[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Monotone-devel] Speed of Annotate
From: |
Thomas Moschny |
Subject: |
Re: [Monotone-devel] Speed of Annotate |
Date: |
Wed, 21 Feb 2007 10:35:36 +0100 |
User-agent: |
KMail/1.9.6 |
On Tue 2007-02-06, Thomas Moschny wrote:
> So, there was the idea of looking directly at the roster deltas instead.
> The head version of n.v.m.experiment.annotate.roster_deltas does exactly
> that. It introduces a way to extract node-specific changes from a roster
> delta, and uses these methods in the annotate code. It also avoids fetching
> the same information twice, as the old code did.
Attached is the current diff of that branch against mainline. If no one
objects, I am going to merge it.
- Thomas
#
#
# add_dir "tests/annotate_accidental_clean_merge"
#
# add_file "tests/annotate_accidental_clean_merge/__driver__.lua"
# content [63b05cf0e5ab7d9455d89b319d505071d9672063]
#
# patch "annotate.cc"
# from [5a0e7200fad7865dc1ce9b30aaa531d0f6f7b542]
# to [27e0f14d529224e51282f455b80a804585aa697b]
#
# patch "database.cc"
# from [cac4ab9d4b50bd45c5888e84619bc6f342570367]
# to [51154627ac739699fe09fe49e7953757d7804478]
#
# patch "database.hh"
# from [ea6f156a45cccbf101e7f6befe00c77b8496def5]
# to [d098635da9410b7c7ecd5a05ead436d51a54a1a3]
#
# patch "roster_delta.cc"
# from [66a0a455754209bea533db586769e37b9bd18ca3]
# to [4d06fa6a95df2ff254ce9572a57bc0b0d1bb7e52]
#
# patch "roster_delta.hh"
# from [25e389e2c42e9754480cb7f5e43157623eeaf1fa]
# to [85506c111153554e2184a41cae920e4198890bfd]
#
============================================================
--- tests/annotate_accidental_clean_merge/__driver__.lua
63b05cf0e5ab7d9455d89b319d505071d9672063
+++ tests/annotate_accidental_clean_merge/__driver__.lua
63b05cf0e5ab7d9455d89b319d505071d9672063
@@ -0,0 +1,64 @@
+-- -*-lua-*-
+--
+-- We are testing annotate on this graph:
+--
+-- a
+-- /|\
+-- / | \
+-- b* c* d*
+-- | | |
+-- e* e* e*
+-- \ \/
+-- \ e
+-- \ /
+-- e
+-- |
+-- f*
+--
+-- The current annotate code will arbitrarily select one of the marked
+-- e-revisions for lines coming from e.
+--
+
+mtn_setup()
+
+addfile("foo", "first\nfoo\nthird\n")
+commit()
+rev_a = base_revision()
+
+econtents = "first\nsecond\nthird\n"
+
+writefile("foo", "first\nb\nthird\n")
+commit()
+writefile("foo", econtents)
+commit()
+rev_e1 = base_revision()
+
+revert_to(rev_a)
+writefile("foo", "first\nc\nthird\n")
+commit()
+writefile("foo", econtents)
+commit()
+rev_e2 = base_revision()
+
+revert_to(rev_a)
+writefile("foo", "first\nd\nthird\n")
+commit()
+writefile("foo", econtents)
+commit()
+rev_e3 = base_revision()
+
+check(mtn("merge"), 0, false, false)
+check(mtn("update"), 0, false, false)
+
+writefile("foo", econtents .. "fourth\n")
+commit()
+rev_f = base_revision()
+
+check(mtn("annotate", "foo", "--brief"), 0, true, true)
+
+check(qgrep(rev_a .. ": first", "stdout"))
+check(qgrep(rev_e1 .. ": second", "stdout")
+ or qgrep(rev_e2 .. ": second", "stdout")
+ or qgrep(rev_e3 .. ": second", "stdout"))
+check(qgrep(rev_a .. ": third", "stdout"))
+check(qgrep(rev_f .. ": fourth", "stdout"))
============================================================
--- annotate.cc 5a0e7200fad7865dc1ce9b30aaa531d0f6f7b542
+++ annotate.cc 27e0f14d529224e51282f455b80a804585aa697b
@@ -160,12 +160,18 @@ struct annotate_node_work
annotate_node_work(shared_ptr<annotate_context> annotations_,
shared_ptr<annotate_lineage_mapping> lineage_,
revision_id revision_, node_id fid_,
- rev_height height_)
+ rev_height height_,
+ set<revision_id> interesting_ancestors_,
+ file_id content_,
+ bool marked_)
: annotations(annotations_),
lineage(lineage_),
revision(revision_),
fid(fid_),
- height(height_)
+ height(height_),
+ interesting_ancestors(interesting_ancestors_),
+ content(content_),
+ marked(marked_)
{}
shared_ptr<annotate_context> annotations;
@@ -173,10 +179,18 @@ struct annotate_node_work
revision_id revision;
node_id fid;
rev_height height;
+ set<revision_id> interesting_ancestors;
+ file_id content;
+ bool marked;
};
struct by_rev {};
+// instead of using a priority queue and a set to keep track of the already
+// seen revisions, we use a multi index container. it stores work units
+// indexed by both, their revision and their revision's height, with the
+// latter being used by default. usage of that data structure frees us from
+// the burden of keeping two data structures in sync.
typedef multi_index_container<
annotate_node_work,
indexed_by<
@@ -662,74 +676,74 @@ annotate_lineage_mapping::set_copied_all
}
}
-static void
-do_annotate_node
-(annotate_node_work const & work_unit,
- app_state & app,
- work_units & work_units)
+// fetches the list of file_content markings for the given revision_id and
+// node_id
+static void get_file_content_marks(app_state & app,
+ revision_id const & rev,
+ node_id const & fid,
+ set<revision_id> & content_marks)
{
- L(FL("do_annotate_node for node %s") % work_unit.revision);
+ marking_t markings;
+ app.db.get_markings(rev, fid, markings);
- roster_t roster;
- marking_t marks;
+ I(!markings.file_content.empty());
- {
- marking_map markmap;
- app.db.get_roster(work_unit.revision, roster, markmap);
+ content_marks.clear();
+ content_marks.insert(markings.file_content.begin(),
+ markings.file_content.end());
+}
- map<node_id, marking_t>::const_iterator mmi =
- markmap.find(work_unit.fid);
- I(mmi != markmap.end());
- marks = mmi->second;
- }
+static void
+do_annotate_node(annotate_node_work const & work_unit,
+ app_state & app,
+ work_units & work_units)
+{
+ L(FL("do_annotate_node for node %s") % work_unit.revision);
- I(marks.file_content.size() != 0);
-
- set<revision_id> parents; // fixme: rename
-
- if (marks.file_content.size() == 1
- && *(marks.file_content.begin()) == work_unit.revision)
- {
- // this node is marked, need to inspect the parents
- app.db.get_revision_parents(work_unit.revision, parents);
- }
- else
- {
- // jump directly to the marked ancestors
- parents = marks.file_content;
- }
-
size_t added_in_parent_count = 0;
- for (set<revision_id>::const_iterator i = parents.begin();
- i != parents.end(); i++)
+ for (set<revision_id>::const_iterator i =
work_unit.interesting_ancestors.begin();
+ i != work_unit.interesting_ancestors.end(); i++)
{
+ // here, 'parent' means either a real parent or one of the marked
+ // ancestors, depending on whether work_unit.marked is true.
revision_id parent_revision = *i;
- roster_t parent_roster;
L(FL("do_annotate_node processing edge from parent %s to child %s")
% parent_revision % work_unit.revision);
I(!(work_unit.revision == parent_revision));
- app.db.get_roster(parent_revision, parent_roster);
- if (!parent_roster.has_node(work_unit.fid))
+ file_id file_in_parent;
+
+ work_units::index<by_rev>::type::iterator lmn =
+ work_units.get<by_rev>().find(parent_revision);
+
+ // find out the content hash of the file in parent.
+ if (lmn != work_units.get<by_rev>().end())
+ // we already got the content hash.
+ file_in_parent = lmn->content;
+ else
{
+ if (work_unit.marked)
+ app.db.get_file_content(parent_revision, work_unit.fid,
file_in_parent);
+ else
+ // we are not marked, so parent is marked.
+ file_in_parent = work_unit.content;
+ }
+
+ // stop if file is not present in the parent.
+ if (null_id(file_in_parent))
+ {
L(FL("file added in %s, continuing") % work_unit.revision);
added_in_parent_count++;
continue;
}
-
- // The node was live in the parent, so this represents a delta.
- file_t file_in_child =
- downcast_to_file_t(roster.get_node(work_unit.fid));
-
- file_t file_in_parent =
- downcast_to_file_t(parent_roster.get_node(work_unit.fid));
-
+
+ // the node was live in the parent, so this represents a delta.
shared_ptr<annotate_lineage_mapping> parent_lineage;
- if (file_in_parent->content == file_in_child->content)
+ if (file_in_parent == work_unit.content)
{
L(FL("parent file identical, "
"set copied all mapped and copy lineage\n"));
@@ -739,9 +753,9 @@ do_annotate_node
else
{
file_data data;
- app.db.get_file_version(file_in_parent->content, data);
+ app.db.get_file_version(file_in_parent, data);
L(FL("building parent lineage for parent file %s")
- % file_in_parent->content);
+ % file_in_parent);
parent_lineage
= work_unit.lineage->build_parent_lineage(work_unit.annotations,
parent_revision,
@@ -750,26 +764,41 @@ do_annotate_node
// If this parent has not yet been queued for processing, create the
// work unit for it.
- work_units::index<by_rev>::type::iterator lmn =
- work_units.get<by_rev>().find(parent_revision);
+ if (lmn == work_units.get<by_rev>().end())
+ {
+ set<revision_id> parents_interesting_ancestors;
+ bool parent_marked;
- if (lmn == work_units.get<by_rev>().end()) // not yet seen
- {
- // Once we move on to processing the parent that this file was
- // renamed from, we'll need the old name
+ if (work_unit.marked)
+ {
+ // we are marked, thus we don't know a priori whether parent
+ // is marked or not.
+ get_file_content_marks(app, parent_revision, work_unit.fid,
parents_interesting_ancestors);
+ parent_marked = (parents_interesting_ancestors.size() == 1
+ && *(parents_interesting_ancestors.begin()) ==
parent_revision);
+ }
+ else
+ parent_marked = true;
+
+ // if it's marked, we need to look at its parents instead.
+ if (parent_marked)
+ app.db.get_revision_parents(parent_revision,
parents_interesting_ancestors);
+
rev_height parent_height;
app.db.get_rev_height(parent_revision, parent_height);
annotate_node_work newunit(work_unit.annotations,
parent_lineage,
parent_revision,
work_unit.fid,
- parent_height);
-
+ parent_height,
+ parents_interesting_ancestors,
+ file_in_parent,
+ parent_marked);
work_units.insert(newunit);
}
else
{
- // Already a pending node, so we just have to merge the lineage.
+ // already a pending node, so we just have to merge the lineage.
L(FL("merging lineage from node %s to parent %s")
% work_unit.revision % parent_revision);
@@ -777,7 +806,7 @@ do_annotate_node
}
}
- if (added_in_parent_count == parents.size())
+ if (added_in_parent_count == work_unit.interesting_ancestors.size())
{
work_unit.lineage->credit_mapped_lines(work_unit.annotations);
}
@@ -799,9 +828,18 @@ do_annotate (app_state &app, file_t file
work_units work_units;
{
+ // prepare the first work_unit
rev_height height;
app.db.get_rev_height(rid, height);
- annotate_node_work workunit(acp, lineage, rid, file_node->self, height);
+ set<revision_id> rids_interesting_ancestors;
+ get_file_content_marks(app, rid, file_node->self,
rids_interesting_ancestors);
+ bool rid_marked = (rids_interesting_ancestors.size() == 1
+ && *(rids_interesting_ancestors.begin()) == rid);
+ if (rid_marked)
+ app.db.get_revision_parents(rid, rids_interesting_ancestors);
+
+ annotate_node_work workunit(acp, lineage, rid, file_node->self, height,
+ rids_interesting_ancestors,
file_node->content, rid_marked);
work_units.insert(workunit);
}
============================================================
--- database.cc cac4ab9d4b50bd45c5888e84619bc6f342570367
+++ database.cc 51154627ac739699fe09fe49e7953757d7804478
@@ -1394,7 +1394,148 @@ struct roster_reconstruction_graph : pub
}
};
+struct database::extractor
+{
+ virtual bool look_at_delta(roster_delta const & del) = 0;
+ virtual void look_at_roster(roster_t const & roster, marking_map const & mm)
= 0;
+ virtual ~extractor() {};
+};
+
+struct database::markings_extractor : public database::extractor
+{
+private:
+ node_id const & nid;
+ marking_t & markings;
+
+public:
+ markings_extractor(node_id const & _nid, marking_t & _markings) :
+ nid(_nid), markings(_markings) {} ;
+
+ bool look_at_delta(roster_delta const & del)
+ {
+ return try_get_markings_from_roster_delta(del, nid, markings);
+ }
+
+ void look_at_roster(roster_t const & roster, marking_map const & mm)
+ {
+ map<node_id, marking_t>::const_iterator mmi =
+ mm.find(nid);
+ I(mmi != mm.end());
+ markings = mmi->second;
+ }
+};
+
+struct database::file_content_extractor : database::extractor
+{
+private:
+ node_id const & nid;
+ file_id & content;
+
+public:
+ file_content_extractor(node_id const & _nid, file_id & _content) :
+ nid(_nid), content(_content) {} ;
+
+ bool look_at_delta(roster_delta const & del)
+ {
+ return try_get_content_from_roster_delta(del, nid, content);
+ }
+
+ void look_at_roster(roster_t const & roster, marking_map const & mm)
+ {
+ if (roster.has_node(nid))
+ content = downcast_to_file_t(roster.get_node(nid))->content;
+ else
+ content = file_id();
+ }
+};
+
void
+database::extract_from_deltas(revision_id const & id, extractor & x)
+{
+ reconstruction_path selected_path;
+ {
+ roster_reconstruction_graph graph(*this);
+ {
+ // we look at the nearest delta(s) first, without constructing the
+ // whole path, as that would be a rather expensive operation.
+ //
+ // the reason why this strategy is worth the effort is, that in most
+ // cases we are looking at the parent of a (content-)marked node, thus
+ // the information we are for is right there in the delta leading to
+ // this node.
+ //
+ // recording the deltas visited here in a set as to avoid inspecting
+ // them later seems to be of little value, as it imposes a cost here,
+ // but can seldom be exploited.
+ set<string> deltas;
+ graph.get_next(id.inner()(), deltas);
+ for (set<string>::const_iterator i = deltas.begin();
+ i != deltas.end(); ++i)
+ {
+ roster_delta del;
+ get_roster_delta(id.inner()(), *i, del);
+ bool found = x.look_at_delta(del);
+ if (found)
+ return;
+ }
+ }
+ get_reconstruction_path(id.inner()(), graph, selected_path);
+ }
+
+ int path_length(selected_path.size());
+ int i(0);
+ string target_rev;
+
+ for (reconstruction_path::const_iterator p = selected_path.begin();
+ p != selected_path.end(); ++p)
+ {
+ if (i > 0)
+ {
+ roster_delta del;
+ get_roster_delta(target_rev, *p, del);
+ bool found = x.look_at_delta(del);
+ if (found)
+ return;
+ }
+ if (i == path_length-1)
+ {
+ // last iteration, we have reached a roster base
+ roster_t roster;
+ marking_map mm;
+ get_roster_base(*p, roster, mm);
+ x.look_at_roster(roster, mm);
+ return;
+ }
+ target_rev = *p;
+ ++i;
+ }
+}
+
+void
+database::get_markings(revision_id const & id,
+ node_id const & nid,
+ marking_t & markings)
+{
+ markings_extractor x(nid, markings);
+ extract_from_deltas(id, x);
+}
+
+void
+database::get_file_content(revision_id const & id,
+ node_id const & nid,
+ file_id & content)
+{
+ // the imaginary root revision doesn't have any file.
+ if (null_id(id))
+ {
+ content = file_id();
+ return;
+ }
+ file_content_extractor x(nid, content);
+ extract_from_deltas(id, x);
+}
+
+void
database::get_roster_version(revision_id const & id,
cached_roster & cr)
{
============================================================
--- database.hh ea6f156a45cccbf101e7f6befe00c77b8496def5
+++ database.hh d098635da9410b7c7ecd5a05ead436d51a54a1a3
@@ -345,6 +345,20 @@ public:
bool roster_version_exists(revision_id const & ident);
void get_roster_ids(std::set<revision_id> & ids);
+ // using roster deltas
+ void get_markings(revision_id const & id,
+ node_id const & nid,
+ marking_t & markings);
+
+ void get_file_content(revision_id const & id,
+ node_id const & nid,
+ file_id & content);
+private:
+ struct extractor;
+ struct file_content_extractor;
+ struct markings_extractor;
+ void extract_from_deltas(revision_id const & id, extractor & x);
+
//
// --== Keys ==--
//
============================================================
--- roster_delta.cc 66a0a455754209bea533db586769e37b9bd18ca3
+++ roster_delta.cc 4d06fa6a95df2ff254ce9572a57bc0b0d1bb7e52
@@ -475,6 +475,16 @@ delta_rosters(roster_t const & from, mar
del = roster_delta(printer.buf);
}
+static
+void read_roster_delta(roster_delta const & del,
+ roster_delta_t & d)
+{
+ basic_io::input_source src(del.inner()(), "roster_delta");
+ basic_io::tokenizer tok(src);
+ basic_io::parser pars(tok);
+ parse_roster_delta_t(pars, d);
+}
+
void
apply_roster_delta(roster_delta const & del,
roster_t & roster, marking_map & markings)
@@ -483,15 +493,77 @@ apply_roster_delta(roster_delta const &
MM(roster);
MM(markings);
- basic_io::input_source src(del.inner()(), "roster_delta");
- basic_io::tokenizer tok(src);
- basic_io::parser pars(tok);
roster_delta_t d;
- parse_roster_delta_t(pars, d);
+ read_roster_delta(del, d);
d.apply(roster, markings);
}
+// Extract the marking for one node from the roster delta, or return false
+// if they are not contained in that delta
+bool
+try_get_markings_from_roster_delta(roster_delta const & del,
+ node_id const & nid,
+ marking_t & markings)
+{
+ roster_delta_t d;
+ read_roster_delta(del, d);
+ std::map<node_id, marking_t>::iterator i = d.markings_changed.find(nid);
+ if (i != d.markings_changed.end())
+ {
+ markings = i->second;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+// Extract the content hash for one node from the roster delta -- if it is
+// available. If we know what the file_id was, we return true and set
+// 'content' appropriately. If we can prove that the file did not exist in
+// this revision, we return true and set 'content' to a null id. If we
+// cannot determine anything about the file contents, then we return false
+// -- in this case content is left undefined.
+bool
+try_get_content_from_roster_delta(roster_delta const & del,
+ node_id const & nid,
+ file_id & content)
+{
+ roster_delta_t d;
+ read_roster_delta(del, d);
+
+ roster_delta_t::deltas_applied_t::const_iterator i =
d.deltas_applied.find(nid);
+ if (i != d.deltas_applied.end())
+ {
+ content = i->second;
+ return true;
+ }
+
+ // special case 1: the node was deleted, so we know for sure it's not
+ // there anymore in this roster
+ if (d.nodes_deleted.find(nid) != d.nodes_deleted.end())
+ {
+ content = file_id();
+ return true;
+ }
+
+ // special case 2: the node was added, so we need to get the current
+ // content hash from the add stanza
+ for (roster_delta_t::files_added_t::const_iterator j = d.files_added.begin();
+ j != d.files_added.end(); ++j)
+ {
+ if (j->second.first == nid)
+ {
+ content = j->second.second;
+ return true;
+ }
+ }
+
+ return false;
+}
+
#ifdef BUILD_UNIT_TESTS
static void
============================================================
--- roster_delta.hh 25e389e2c42e9754480cb7f5e43157623eeaf1fa
+++ roster_delta.hh 85506c111153554e2184a41cae920e4198890bfd
@@ -25,7 +25,17 @@ apply_roster_delta(roster_delta const &
apply_roster_delta(roster_delta const & del,
roster_t & roster, marking_map & markings);
+bool
+try_get_markings_from_roster_delta(roster_delta const & del,
+ node_id const & nid,
+ marking_t & markings);
+// See the comment on this function's body for a description of its api.
+bool
+try_get_content_from_roster_delta(roster_delta const & del,
+ node_id const & nid,
+ file_id & content);
+
#ifdef BUILD_UNIT_TESTS
// instead of having elaborate tests here, we just export a function, and then
annotate.cc | 160 +++++++++++--------
database.cc | 141 ++++++++++++++++
database.hh | 14 +
roster_delta.cc | 80 +++++++++
roster_delta.hh | 10 +
tests/annotate_accidental_clean_merge/__driver__.lua | 64 +++++++
6 files changed, 404 insertions(+), 65 deletions(-)
pgp4Qxlxuoegk.pgp
Description: PGP signature