// Copyright 2017 <Jozef Brandys>
#ifndef PINTOOL_CLUSTERING_H_
#define PINTOOL_CLUSTERING_H_

#include <glog/logging.h>
#include <algorithm>
#include <memory>
#include <queue>
#include <cmath>
#include <string>
#include <set>
#include <utility>
#include <vector>

#include "Cluster.h"
#include "Trace_binary.h"
#include "Cluster_clasifier.h"

#include <boost/dynamic_bitset.hpp>
#include <boost/iostreams/device/file_descriptor.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/zlib.hpp>

namespace io = boost::iostreams;

class ClusteringStats {
 public:
  ClusteringStats() {
    numBelongs.resize(100, 0);
    numBelongsFailed.resize(100, 0);
  }
  std::vector<int> numBelongs;
  std::vector<int> numBelongsFailed;
};

class Clustering {
 private:
  std::string clasifier_ = kClusterClasifierNone;
  std::set<std::shared_ptr<Cluster>> clusters;

  typedef std::pair<double,
    std::pair<std::weak_ptr<Cluster>, std::weak_ptr<Cluster>>> pq_item;
  static bool cmpPQ_(pq_item a, pq_item b) { return a.first >= b.first; }
  std::priority_queue<pq_item, std::vector<pq_item>, decltype(&cmpPQ_)>
      pq{cmpPQ_};

 public:
  ClusteringStats stats;

  Clustering(const std::vector<Trace_binary> &traces, std::string clasifier,
    std::string &outputDir) : clasifier_(clasifier) {
    LOG(INFO) << "Computing from " << traces.size() << " traces..." << "\n";
    computeAllDistances(traces);
    computeClusters(traces, outputDir);
  }

  explicit Clustering(std::ifstream &in_file) {
    io::filtering_istream in_filter;
    in_filter.push(io::zlib_decompressor());
    in_filter.push(in_file);
    std::istream in(in_filter.rdbuf());

    LOG(INFO) << "Reading clustering from file" << std::endl;
    int num_clusters;
    in >> num_clusters;
    in >> clasifier_;
    for (int i = 0; i < num_clusters; i++) {
      LOG(INFO) << "Reading cluster " << i <<  " / " << num_clusters << "\n";
      auto cl = std::make_shared<Cluster>();
      cl->Read(in, clasifier_);
      clusters.insert(cl);
    }
  }

  void ChangeClasifier(std::string clasifier) {
    clasifier_ = clasifier;
  }

  std::shared_ptr<Cluster> FindCluster(const Trace_binary &trace) {
    int belongsTo = 0;
    double minimumDistance = 0;
    for (auto &cl : clusters) {
      if (cl->IsInCluster(trace)) {
      double actualDistance = cl->distance(trace);
      if (!belongsTo) {
        minimumDistance = actualDistance;
      }
      minimumDistance = std::min(minimumDistance, actualDistance);
      belongsTo++;
      }
    }
    stats.numBelongs[belongsTo]++;

    for (auto &cl : clusters) {
      if (cl->IsInCluster(trace) &&
        abs(cl->distance(trace) - minimumDistance) < 0.001 &&
        cl->AcceptsTrace(trace)) {
      return cl;
      }
    }
    stats.numBelongsFailed[belongsTo]++;
    return std::shared_ptr<Cluster>(nullptr);
  }

  void RunClusterTraining(const std::vector<Trace_binary> &traces,
    std::ostream &out_file) {
    /* Prepare for output */
    io::filtering_ostream out_filter;
    out_filter.push(io::zlib_compressor());
    out_filter.push(out_file);
    std::ostream out(out_filter.rdbuf());

    out << clusters.size() << std::endl;
    out << clasifier_ << std::endl;

    LOG(INFO) << "RunClusterTraining Begin";
    for (auto cl : clusters) {
      cl->ClearTraces();
    }

    // Rozdel jednotlive behy do trace
    for (auto &trace : traces) {
      double best = 1023456789;
      for (auto cl : clusters) {
        if (cl->IsInCluster(trace)) {
          best = std::min(best, cl->distance(trace));
        }
      }
      for (auto &cl : clusters) {
        if (cl->IsInCluster(trace) && abs(cl->distance(trace) - best) < 0.001) {
          cl->addTrace(trace);
        }
      }
      if (best == 1023456789) {
      LOG(FATAL) << "NO cluster";
      }
    }
    LOG(INFO) << "RunClusterTraining Dividing clusters finished";
    int counter = 0;
    for (auto cl : clusters) {
      LOG(INFO) << "Training cluster " << ++counter << " / " << clusters.size();
      cl->TrainCluster(clasifier_);
      cl->Print(out);
      cl->ClearTraces();
    }
    LOG(INFO) << "RunClusterTraining End";
  }

  void PrintStats(std::ostream &out, const std::vector<Trace_binary> &traces) {
    out << "Cluster sizes   : ";
    for (auto &cl : clusters) {
      out << " " << cl->NumTraces();
    }
    out << std::endl;
    out << "Cluster accetps : ";
    for (auto &cl : clusters) {
      int size = 0;
      for (auto &trace : traces) {
        if (cl->IsInCluster(trace)) {
          size++;
        }
      }
      out << " " << size;
    }
    out << std::endl;
  }

 private:
  // Precomputes distances between all traces
  void computeAllDistances(const std::vector<Trace_binary> &traces) {
    for (auto &trace : traces) {
      clusters.insert(std::make_shared<Cluster>(trace));
    }
    for (auto &clA : clusters) {
      for (auto &clB : clusters) {
        if (clA == clB) continue;
        pq.emplace(Cluster::distance(*clA, *clB),
          make_pair(wpCluster(clA), wpCluster(clB)));
      }
    }
  }

  void computeClusters(const std::vector<Trace_binary> &traces,
      const std::string &outputDir) {
    while (!pq.empty() && clusters.size() != 1) {
      std::cerr << "\rActual number of clusters: " << clusters.size() << " " <<
          pq.size() << " " << pq.top().first << "       ";
      auto akt = pq.top(); pq.pop();
      if (akt.second.first.expired() || akt.second.second.expired()) {
        continue;
      }

      std::shared_ptr<Cluster> clA = akt.second.first.lock(),
        clB = akt.second.second.lock();
      auto clNew = std::make_shared<Cluster>(clA, clB);
      clusters.erase(clA);
      clusters.erase(clB);
      for (auto &cl : clusters) {
        pq.emplace(
          Cluster::distance(*cl, *clNew) * Cluster::penalty(*cl, *clNew),
          make_pair(wpCluster(clNew), wpCluster(cl)));
      }
      clusters.insert(clNew);

      if (clusters.size() < 50) {
        char f_name[100];
        snprintf(f_name, sizeof(f_name), "/clusters.%05lu", clusters.size());
        std::ofstream out(outputDir + f_name);
        if (!out) {
          LOG(FATAL) << "Cannot open " << outputDir + f_name << std::endl;
        }
        RunClusterTraining(traces, out);
      }
    }
    while (!pq.empty()) pq.pop();
    std::cerr << "\nClustering is finished." << std::endl;
  }
};

#endif  // PINTOOL_CLUSTERING_H_
