From 8d39a5ddf0a27403abb34042304d026d234f92b0 Mon Sep 17 00:00:00 2001
From: Zhaocheng Zhu <healmysoul@163.com>
Date: Mon, 5 Aug 2019 14:07:22 -0400
Subject: [PATCH 1/2] add paper links

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 47fd417..388c63d 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,9 @@ Citation
 --------
 
 If you find GraphVite useful for your research or development, please cite the
-following paper.
+following [paper].
+
+[paper]: https://arxiv.org/pdf/1903.00757.pdf
 
 ```
 @inproceedings{zhu2019graphvite,

From 000f1797f0a92de609d9a75805a17668f385c463 Mon Sep 17 00:00:00 2001
From: Zhaocheng Zhu <healmysoul@163.com>
Date: Wed, 7 Aug 2019 15:07:46 -0400
Subject: [PATCH 2/2] fix knowledge graph initialization

---
 include/bind.h                       | 29 +++++++++++++++-------------
 include/core/solver.h                | 23 ++++++++++++++--------
 include/instance/graph.cuh           | 11 ++++++-----
 include/instance/knowledge_graph.cuh | 11 +++++++----
 include/instance/visualization.cuh   |  9 +++++----
 5 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/include/bind.h b/include/bind.h
index cfc2fc3..807855e 100644
--- a/include/bind.h
+++ b/include/bind.h
@@ -449,12 +449,12 @@ class pyGraphSolver : public py::class_<graphvite::GraphSolver<dim, Float, Index
             )");
 
         def("train", &GraphSolver::train, py::no_gil(),
-            py::arg("model") = "LINE", py::arg("num_epoch") = 2000, py::arg("augmentation_step") = 5,
-            py::arg("random_walk_length") = 40, py::arg("random_walk_batch_size") = 100,
-            py::arg("shuffle_base") = graphvite::kAuto, py::arg("p") = 1, py::arg("q") = 1,
-            py::arg("positive_reuse") = 1, py::arg("negative_sample_exponent") = 0.75, py::arg("negative_weight") = 5,
-            py::arg("log_frequency") = 1000,
-            "train(model='LINE', num_epoch=2000, augmentation_step=5, random_walk_length=40, "
+            py::arg("model") = "LINE", py::arg("num_epoch") = 2000, py::arg("resume") = false,
+            py::arg("augmentation_step") = 5, py::arg("random_walk_length") = 40,
+            py::arg("random_walk_batch_size") = 100, py::arg("shuffle_base") = graphvite::kAuto, py::arg("p") = 1,
+            py::arg("q") = 1, py::arg("positive_reuse") = 1, py::arg("negative_sample_exponent") = 0.75,
+            py::arg("negative_weight") = 5, py::arg("log_frequency") = 1000,
+            "train(model='LINE', num_epoch=2000, resume=False, augmentation_step=5, random_walk_length=40, "
                   "random_walk_batch_size=100, shuffle_base=auto, p=1, q=1, positive_reuse=1, "
                   "negative_sample_exponent=0.75, negative_weight=5, log_frequency=1000)"
             R"(
@@ -463,6 +463,7 @@ class pyGraphSolver : public py::class_<graphvite::GraphSolver<dim, Float, Index
             Parameters:
                 model (str, optional): 'DeepWalk', 'LINE' or 'node2vec'
                 num_epoch (int, optional): number of epochs, i.e. #positive edges / \|E\|
+                resume (bool, optional): resume training from learned embeddings or not
                 augmentation_step (int, optional):
                     node pairs with distance <= augmentation_step are considered as positive samples
                 random_walk_length (int, optional): length of each random walk
@@ -562,17 +563,18 @@ class pyKnowledgeGraphSolver : public py::class_<graphvite::KnowledgeGraphSolver
             )");
 
         def("train", &KnowledgeGraphSolver::train, py::no_gil(),
-            py::arg("model") = "RotatE", py::arg("num_epoch") = 2000, py::arg("margin") = 24,
+            py::arg("model") = "RotatE", py::arg("num_epoch") = 2000, py::arg("resume") = false, py::arg("margin") = 24,
             py::arg("l3_regularization") = 2e-3, py::arg("sample_batch_size") = 2000, py::arg("positive_reuse") = 1,
             py::arg("adversarial_temperature") = 2, py::arg("log_frequency") = 100,
-            "train(model='RotatE', num_epoch=2000, margin=24, l3_regulariation=2e-3, sample_batch_size=2000, "
-                  "positive_reuse=1, adversarial_temperature=2, log_frequency=100)"
+            "train(model='RotatE', num_epoch=2000, resume=False, margin=24, l3_regulariation=2e-3, "
+                  "sample_batch_size=2000, positive_reuse=1, adversarial_temperature=2, log_frequency=100)"
             R"(
             Train knowledge graph embeddings.
 
             Parameters:
                 model (str, optional): 'TransE', 'DistMult', 'ComplEx', 'SimplE' or 'RotatE'
                 num_epoch (int, optional): number of epochs, i.e. #positive edges / \|E\|
+                resume (bool, optional): resume training from learned embeddings or not
                 margin (float, optional): logit margin (for TransE & RotatE)
                 l3_regularization (float, optional): L3 regularization (for DistMult, ComplEx & SimplE)
                 sample_batch_size (int, optional): batch size of samples in samplers
@@ -665,10 +667,10 @@ class pyVisualizationSolver : public py::class_<graphvite::VisualizationSolver<d
             )");
 
         def("train", &VisualizationSolver::train, py::no_gil(),
-            py::arg("model") = "LargeVis", py::arg("num_epoch") = 50, py::arg("sample_batch_size") = 2000,
-            py::arg("positive_reuse") = 5, py::arg("negative_sample_exponent") = 0.75, py::arg("negative_weight") = 3,
-            py::arg("log_frequency") = 1000,
-            "train(model='LargeVis', num_epoch=100, sample_batch_size=2000, positive_reuse=1, "
+            py::arg("model") = "LargeVis", py::arg("num_epoch") = 50, py::arg("resume") = false,
+            py::arg("sample_batch_size") = 2000, py::arg("positive_reuse") = 5,
+            py::arg("negative_sample_exponent") = 0.75, py::arg("negative_weight") = 3, py::arg("log_frequency") = 1000,
+            "train(model='LargeVis', num_epoch=100, resume=False, sample_batch_size=2000, positive_reuse=1, "
                   "negative_sample_exponent=0.75, negative_weight=3, log_frequency=1000)"
             R"(
             Train visualization.
@@ -676,6 +678,7 @@ class pyVisualizationSolver : public py::class_<graphvite::VisualizationSolver<d
             Parameters:
                 model (str, optional): 'LargeVis'
                 num_epoch (int, optional): number of epochs, i.e. #positive edges / \|E\|
+                resume (bool, optional): resume training from learned embeddings or not
                 sample_batch_size (int, optional): batch size of samples in samplers
                 positive_reuse (int, optional): times of reusing positive samples
                 negative_sample_epoxnent (float, optional): exponent of degrees in negative sampling
diff --git a/include/core/solver.h b/include/core/solver.h
index a6c648e..5936ec0 100644
--- a/include/core/solver.h
+++ b/include/core/solver.h
@@ -99,7 +99,7 @@ class SolverMixin {
     float negative_sample_exponent, negative_weight;
     int num_epoch, episode_size, batch_size, positive_reuse;
     int log_frequency;
-    bool shuffle_partition, naive_parallel;
+    bool shuffle_partition, naive_parallel, resume;
     int assignment_offset;
     std::vector<std::shared_ptr<std::vector<Vector>>> embeddings;
     std::vector<std::shared_ptr<std::vector<std::vector<Vector>>>> moments;
@@ -143,6 +143,7 @@ class SolverMixin {
     using type::negative_sample_exponent; \
     using type::negative_weight; \
     using type::num_epoch; \
+    using type::resume; \
     using type::episode_size; \
     using type::batch_size; \
     using type::optimizer; \
@@ -283,6 +284,7 @@ class SolverMixin {
                 << ", but " << batch_size << " is specified";
         episode_size = _episode_size;
         available_models = get_available_models();
+        batch_id = 0;
 
         // build embeddings & moments
         protocols = get_protocols();
@@ -338,8 +340,6 @@ class SolverMixin {
                 << "The negative sampler can't be binded to global range and any partition at the same time";
 
         build_alias();
-        init_embeddings();
-        init_moments();
 
         auto min_partition = get_min_partition();
         if (num_partition == kAuto) {
@@ -442,6 +442,7 @@ class SolverMixin {
         ss << "model: " << model << std::endl;
         ss << optimizer.info() << std::endl;
         ss << "#epoch: " << num_epoch << ", batch size: " << batch_size << std::endl;
+        ss << "resume: " << pretty::yes_no(resume) << std::endl;
         ss << "positive reuse: " << positive_reuse << ", negative weight: " << negative_weight;
         return ss.str();
     }
@@ -522,18 +523,21 @@ class SolverMixin {
      * @brief Train graph embeddings
      * @param _model model
      * @param _num_epoch number of epochs, i.e. #positive edges / |E|
+     * @param _resume resume training from learned embeddings or not
      * @param _sample_batch_size batch size of samples in samplers
      * @param _positive_reuse times of reusing positive samples
      * @param _negative_sample_exponent exponent of degrees in negative sampling
      * @param _negative_weight weight for each negative sample
      * @param _log_frequency log every log_frequency batches
      */
-    void train(const std::string &_model, int _num_epoch = 2000, int _sample_batch_size = 2000, int _positive_reuse = 1,
-               float _negative_sample_exponent = 0.75, float _negative_weight = 5, int _log_frequency = 1000) {
+    void train(const std::string &_model, int _num_epoch = 2000, bool _resume = false, int _sample_batch_size = 2000,
+               int _positive_reuse = 1, float _negative_sample_exponent = 0.75, float _negative_weight = 5,
+               int _log_frequency = 1000) {
         CHECK(graph) << "The model must be built on a graph first";
         model = _model;
         CHECK(available_models.find(model) != available_models.end()) << "Invalid model `" << model << "`";
         num_epoch = _num_epoch;
+        resume = _resume;
         sample_batch_size = _sample_batch_size;
         positive_reuse = _positive_reuse;
         negative_sample_exponent = _negative_sample_exponent;
@@ -544,9 +548,12 @@ class SolverMixin {
         log_frequency = _log_frequency;
 
         LOG(WARNING) << pretty::block(info());
-
-        batch_id = 0;
-        num_batch = num_epoch * num_edge / batch_size;
+        if (!resume) {
+            init_embeddings();
+            init_moments();
+            batch_id = 0;
+        }
+        num_batch = batch_id + num_epoch * num_edge / batch_size;
 
         std::vector<std::thread> sample_threads(num_sampler);
         std::vector<std::thread> worker_threads(num_worker);
diff --git a/include/instance/graph.cuh b/include/instance/graph.cuh
index 6840f87..3bf7df3 100644
--- a/include/instance/graph.cuh
+++ b/include/instance/graph.cuh
@@ -714,6 +714,7 @@ public:
      * @brief Train node embeddings
      * @param _model "DeepWalk", "LINE" or "node2vec"
      * @param _num_epoch number of epochs, i.e. #positive edges / |E|
+     * @param _resume resume training from learned embeddings or not
      * @param _augmentation_step node pairs with distance <= augmentation_step are considered as positive samples
      * @param _random_walk_length length of each random walk
      * @param _random_walk_batch_size batch size of random walks in samplers
@@ -725,10 +726,10 @@ public:
      * @param _negative_weight weight for each negative sample
      * @param _log_frequency log every log_frequency batches
      */
-    void train(const std::string &_model = "LINE", int _num_epoch = 2000, int _augmentation_step = 5,
-               int _random_walk_length = 40, int _random_walk_batch_size = 100, int _shuffle_base = kAuto,
-               float _p = 1, float _q = 1, int _positive_reuse = 1, float _negative_sample_exponent = 0.75,
-               float _negative_weight = 5, int _log_frequency = 1000) {
+    void train(const std::string &_model = "LINE", int _num_epoch = 2000, bool _resume = false,
+               int _augmentation_step = 5, int _random_walk_length = 40, int _random_walk_batch_size = 100,
+               int _shuffle_base = kAuto, float _p = 1, float _q = 1, int _positive_reuse = 1,
+               float _negative_sample_exponent = 0.75, float _negative_weight = 5, int _log_frequency = 1000) {
         augmentation_step = _augmentation_step;
         random_walk_length = _random_walk_length;
         random_walk_batch_size = _random_walk_batch_size;
@@ -744,7 +745,7 @@ public:
         CHECK(augmentation_step <= random_walk_length)
                 << "`random_walk_length` should be no less than `augmentation_step`";
 
-        Base::train(_model, _num_epoch, random_walk_length * random_walk_batch_size, _positive_reuse,
+        Base::train(_model, _num_epoch, _resume, random_walk_length * random_walk_batch_size, _positive_reuse,
                     _negative_sample_exponent, _negative_weight, _log_frequency);
     }
 
diff --git a/include/instance/knowledge_graph.cuh b/include/instance/knowledge_graph.cuh
index 0b90bd9..c94f325 100644
--- a/include/instance/knowledge_graph.cuh
+++ b/include/instance/knowledge_graph.cuh
@@ -550,6 +550,7 @@ public:
         ss << "model: " << model << std::endl;
         ss << optimizer.info() << std::endl;
         ss << "#epoch: " << num_epoch << ", batch size: " << batch_size << std::endl;
+        ss << "resume: " << pretty::yes_no(resume) << std::endl;
         if (model == "TransE" || model == "RotatE")
             ss << "margin: " << margin << ", positive reuse: " << positive_reuse << std::endl;
         if (model == "DistMult" || model == "ComplEx" || model == "SimplE")
@@ -562,6 +563,7 @@ public:
      * @brief Train knowledge graph embeddings
      * @param _model "TransE", "DistMult", "ComplEx", "SimplE" or "RotatE"
      * @param _num_epoch number of epochs, i.e. #positive edges / |E|
+     * @param _resume resume training from learned embeddings or not
      * @param _margin logit margin (for TransE & RotatE)
      * @param _l3_regularization l3 regularization (for DistMult, ComplEx & SimplE)
      * @param _sample_batch_size batch size of samples in samplers
@@ -570,14 +572,15 @@ public:
      *     disabled when set to non-positive value
      * @param _log_frequency log every log_frequency batches
      */
-    void train(const std::string &_model = "RotatE", int _num_epoch = 2000, float _margin = 24,
-               float _l3_regularization = 2e-3, int _sample_batch_size = 2000,
-               int _positive_reuse = 1, float _adversarial_temperature = 2, int _log_frequency = 100) {
+    void train(const std::string &_model = "RotatE", int _num_epoch = 2000, bool _resume = false, float _margin = 24,
+               float _l3_regularization = 2e-3, int _sample_batch_size = 2000, int _positive_reuse = 1,
+               float _adversarial_temperature = 2, int _log_frequency = 100) {
         margin = _margin;
         l3_regularization = _l3_regularization;
         adversarial_temperature = _adversarial_temperature;
 
-        Base::train(_model, _num_epoch, _sample_batch_size, _positive_reuse, 0, 1.0f / num_negative, _log_frequency);
+        Base::train(_model, _num_epoch, _resume, _sample_batch_size, _positive_reuse, 0, 1.0f / num_negative,
+                    _log_frequency);
     }
 };
 
diff --git a/include/instance/visualization.cuh b/include/instance/visualization.cuh
index 299077b..92da778 100644
--- a/include/instance/visualization.cuh
+++ b/include/instance/visualization.cuh
@@ -526,16 +526,17 @@ public:
      * @brief Train visualization
      * @param _model "LargeVis"
      * @param _num_epoch number of epochs, i.e. #positive edges / |E|
+     * @param _resume resume training from learned embeddings or not
      * @param _sample_batch_size batch size of samples in samplers
      * @param _positive_reuse times of reusing positive samples
      * @param _negative_sample_exponent exponent of degrees in negative sampling
      * @param _negative_weight weight for each negative sample
      * @param _log_frequency log every log_frequency batches
      */
-    void train(const std::string &_model = "LargeVis", int _num_epoch = 50, int _sample_batch_size = 2000,
-               int _positive_reuse = 5, float _negative_sample_exponent = 0.75, float _negative_weight = 3,
-               int _log_frequency = 1000) {
-        Base::train(_model, _num_epoch, _sample_batch_size, _positive_reuse, _negative_sample_exponent,
+    void train(const std::string &_model = "LargeVis", int _num_epoch = 50, bool _resume = false,
+               int _sample_batch_size = 2000, int _positive_reuse = 5, float _negative_sample_exponent = 0.75,
+               float _negative_weight = 3, int _log_frequency = 1000) {
+        Base::train(_model, _num_epoch, _resume, _sample_batch_size, _positive_reuse, _negative_sample_exponent,
                     _negative_weight, _log_frequency);
     }
 };