Logo Search packages:      
Sourcecode: tagcoll version File versions  Download package

CardinalityStore.cc

/*
 * Core tagged collection handling
 *
 * Copyright (C) 2003  Enrico Zini <enrico@debian.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */

#include <tagcoll/CardinalityStore.h>
#include <tagcoll/Patches.h>
#include <tagcoll/stringf.h>

#include <iterator>
#include <algorithm>
#include <vector>

//#define TRACE
//#define TRACE0
//#define TRACE1

#ifdef TRACE
#include <cstdio>
#include "stringf.h"
#endif

using namespace std;
using namespace Tagcoll;

template<class T>
static T mergeTags(const T& tag1, const T& tag2)
{
      // By default, just return the first tag
      return tag1;
}

template<>
static string mergeTags(const string& tag1, const string& tag2)
{
      // In case of strings, comma-separate them
      return tag1 + ", " + tag2;
}


template<class ITEM, class TAG>
00057 void CardinalityStore<ITEM, TAG>::applyChange(const PatchList<ITEM, TAG>& change)
{
      // Tagsets to be deleted because they became empty
      for (typename PatchList<ITEM, TAG>::const_iterator i = change.begin(); i != change.end(); i++)
      {
            OpSet<TAG> oldts = getTags(i->first);
            OpSet<TAG> newts = i->second.apply(oldts);

            // Remove the item from the old tagset
            typename tagsets_t::iterator oldi = tagsets.find(oldts);
            if (oldi != tagsets.end())
            {
                  oldi->second -= i->first;
                  if (oldi->second.empty())
                        tagsets.erase(oldi->first);

                  // Decrement the tag cardinalities accordingly
                  for (typename OpSet<TAG>::const_iterator j = oldts.begin();
                              j != oldts.end(); j++)
                        tags.del(*j, 1);
            }

            // Re-add the item with the new tagset
            if (!newts.empty())
                  consume(i->first, newts);
      }
}


template<class ITEM, class TAG>
void CardinalityStore<ITEM, TAG>::TagContainer::add(const TAG& tag, int card) throw ()
{
      typename TagContainer::iterator i = find(tag);
      if (i == TagContainer::end())
            insert(make_pair(tag, card));
      else
            i->second += card;
}

template<class ITEM, class TAG>
void CardinalityStore<ITEM, TAG>::TagContainer::del(const TAG& tag, int card) throw ()
{
      typename TagContainer::iterator i = find(tag);
      if (i != TagContainer::end())
            if (i->second > card)
                  i->second -= card;
            else
                  erase(tag);
}

template<class ITEM, class TAG>
00108 void CardinalityStore<ITEM, TAG>::consumeItem(const ITEM& item, const OpSet<TAG>& tagset)
{
      if (tagset.size() == 0)
            return;

      // Check if tagset is already present
      bool added;
      typename tagsets_t::iterator i = tagsets.find(tagset);
      if (i != tagsets.end())
      {
            // If it already exists, insert the item
            pair<typename OpSet<ITEM>::iterator, bool> r = i->second.insert(item);
            added = r.second;
      } else {
            // Else insert the item as a new set
            OpSet<ITEM> items;
            items += item;
            tagsets.insert(make_pair(tagset, items));
            added = true;
      }

      // Update tags accordingly
      if (added)
            for (typename OpSet<TAG>::const_iterator i = tagset.begin();
                        i != tagset.end(); i++)
                  tags.add(*i, 1);
}

template<class ITEM, class TAG>
00137 void CardinalityStore<ITEM, TAG>::consumeItems(const OpSet<ITEM>& items, const OpSet<TAG>& tagset)
{
      if (tagset.size() == 0 || items.size() == 0)
            return;

      // Check if tagset is already present
      int inserted = 0;
      typename tagsets_t::iterator i = tagsets.find(tagset);
      if (i != tagsets.end())
      {
            // If it already exists, merge the items
            for (typename OpSet<ITEM>::const_iterator j = items.begin();
                        j != items.end(); j++)
            {
                  pair<typename OpSet<ITEM>::iterator, bool> r = i->second.insert(*j);
                  if (r.second)
                        inserted++;
            }
      } else {
            // Else insert them
            tagsets.insert(make_pair(tagset, items));
            inserted = items.size();
      }

      // Update tags accordingly
      for (typename OpSet<TAG>::const_iterator i = tagset.begin();
                  i != tagset.end(); i++)
            tags.add(*i, inserted);
}

template<class ITEM, class TAG>
00168 int CardinalityStore<ITEM, TAG>::itemCount() const
{
      int res = 0;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            res += t->second.size();

      return res;
}

template<class ITEM, class TAG>
00180 OpSet<ITEM> CardinalityStore<ITEM, TAG>::getItemsExactMatch(const OpSet<TAG>& ts) const
{
      typename tagsets_t::const_iterator i = tagsets.find(ts);
      if (i == tagsets.end())
            return OpSet<ITEM>();
      else
            return i->second;
}

template<class ITEM, class TAG>
bool CardinalityStore<ITEM, TAG>::hasItem(const ITEM& item) const
{
      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->second.contains(item))
                  return true;
      return false;
}

template<class ITEM, class TAG>
00200 OpSet<ITEM> CardinalityStore<ITEM, TAG>::getItemsHavingTag(const TAG& tag) const
{
      OpSet<ITEM> res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->first.contains(tag))
                  res += t->second;

      return res;
}

template<class ITEM, class TAG>
00213 OpSet<ITEM> CardinalityStore<ITEM, TAG>::getItemsHavingTags(const OpSet<TAG>& tags) const
{
      OpSet<ITEM> res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->first.contains(tags))
                  res += t->second;

      return res;
}

template<class ITEM, class TAG>
00226 OpSet<TAG> CardinalityStore<ITEM, TAG>::getTagsOfItem(const ITEM& item) const
{
      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->second.contains(item))
                  return t->first;
      return OpSet<TAG>();
}

template<class ITEM, class TAG>
00236 OpSet<TAG> CardinalityStore<ITEM, TAG>::getTagsOfItems(const OpSet<ITEM>& items) const
{
      OpSet<TAG> res;
      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (!(t->second ^ items).empty())
                  res +=  t->first;
      return res;
}

template<class ITEM, class TAG>
00247 OpSet<TAG> CardinalityStore<ITEM, TAG>::getAllTags() const
{
      OpSet<TAG> res;

      for (typename TagContainer::const_iterator t = tags.begin();
                  t != tags.end(); t++)
            res += t->first;

      return res;
}

template<class ITEM, class TAG>
00259 OpSet<ITEM> CardinalityStore<ITEM, TAG>::getTaggedItems() const
{
      OpSet<ITEM> res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            res += t->second;

      return res;
}

template<class ITEM, class TAG>
00271 OpSet<TAG> CardinalityStore<ITEM, TAG>::getCompanionTags(const OpSet<TAG>& ts) const
{
      OpSet<TAG> res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->first.contains(ts))
                  res += t->first - ts;

      return res;
}

template<class ITEM, class TAG>
00284 OpSet<ITEM> CardinalityStore<ITEM, TAG>::getCompanionItems(const OpSet<TAG>& ts) const
{
      OpSet<ITEM> res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->first.contains(ts))
                  res += t->second;

      return res;
}

template<class ITEM, class TAG>
00297 map<ITEM, OpSet<TAG> > CardinalityStore<ITEM, TAG>::getCompanionItemsAndTagsets(const OpSet<TAG>& ts) const
{
      map<ITEM, OpSet<TAG> > res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->first.contains(ts))
                  for (typename set<ITEM>::const_iterator i = t->second.begin();
                              i != t->second.end(); i++)
                        res.insert(make_pair(*i, t->first));

      return res;
}

template<class ITEM, class TAG>
00312 OpSet<ITEM> CardinalityStore<ITEM, TAG>::getRelatedItems(const OpSet<TAG>& ts, int maxdistance) const
{
      list< OpSet<TAG> > tagsets = getRelatedTagsets(ts, maxdistance);
      OpSet<ITEM> items;
      for (typename list< OpSet<TAG> >::const_iterator i = tagsets.begin(); i != tagsets.end(); i++)
            items += getItemsExactMatch(*i);
      return items;
}

template<class ITEM, class TAG>
00322 list< OpSet<TAG> > CardinalityStore<ITEM, TAG>::getRelatedTagsets(const OpSet<TAG>& ts, int maxdistance) const
{
      list< OpSet<TAG> > res;

      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
      {
            int dist = ts.distance(t->first);
            if (dist > 0 && dist <= maxdistance)
                  res.push_back(t->first);
      }

      return res;
}

template<class ITEM, class TAG>
00338 OpSet<TAG> CardinalityStore<ITEM, TAG>::getImplyingOneOf(const OpSet<TAG>& tags) const
{
      OpSet<TAG> allWith;
      OpSet<TAG> allWithout;

      for (typename tagsets_t::const_iterator ts = tagsets.begin();
                  ts != tagsets.end(); ts++)
      {
            OpSet<TAG> intersection = ts->first ^ tags;

            if (intersection.size() == 0)
                  allWithout += ts->first;
            else
                  allWith += ts->first;
      }

      return allWith - allWithout;
}

template<class ITEM, class TAG>
00358 OpSet<TAG> CardinalityStore<ITEM, TAG>::getImpliedBy(const TAG& tag) const
{
      // The tags implied by `tag' are the result of the intersection between all
      // tagsets that contain tag

      typename tagsets_t::const_iterator ts = tagsets.begin();
      // Find the first tagset that contains `tag'
      for ( ; ts != tagsets.end() && ts->first.find(tag) == ts->first.end(); ts++)
            ;
      if (ts == tagsets.end())
            return OpSet<TAG>();

      // Initialize the intersection set with it
      OpSet<TAG> res = ts->first;
      // Remove tag
      res -= tag;

      for (++ts; res.size() > 0 && ts != tagsets.end(); ts++)
            if (ts->first.find(tag) == ts->first.end())
                  continue;
            else
                  // For all tagsets that contain tag
                  // Intersect them with `res'
                  res = ts->first ^ res;

      return res;
}

template<class ITEM, class TAG>
00387 CardinalityStore<ITEM, TAG> CardinalityStore<ITEM, TAG>::getChildCollection(const TAG& tag) const
{
      CardinalityStore<ITEM, TAG> res;
      for (typename tagsets_t::const_iterator ts = tagsets.begin();
                  ts != tagsets.end(); ts++)
      {
            if (ts->first.find(tag) == ts->first.end())
                  continue;
            // for all tagsets ts having tag

            // Insert the tagset, without tag
            OpSet<TAG> newts = ts->first;
            newts.erase(tag);

            // Check if we make orphans, and preserve them
            if (newts.size() == 0 && ts->second.size() > 0)
                  res.consume(ts->second);
            else
                  res.consume(ts->second, newts);
      }
      return res;
}

template<class ITEM, class TAG>
00411 CardinalityStore<ITEM, TAG> CardinalityStore<ITEM, TAG>::getCollectionWithoutTags(const OpSet<TAG>& tags) const
{
      OpSet<TAG> candidates = getImplyingOneOf(tags);

      // remove all tags contained in `candidates', merging categories that
      // result with the same tagset;

      CardinalityStore<ITEM, TAG> res;
      for (typename tagsets_t::const_iterator ts = tagsets.begin();
                  ts != tagsets.end(); ts++)
      {
            OpSet<TAG> newts = ts->first - candidates;

            if (newts.size() > 0)
            {
                  // Insert the newfound tagset in the new collection
                  //res.add(newts, ts->second);
                  res.consume(ts->second, ts->first);
            }
      }

      return res;
}

template<class ITEM, class TAG>
00436 CardinalityStore<ITEM, TAG> CardinalityStore<ITEM, TAG>::getCollectionWithoutTagsetsHaving(const TAG& tag) const
{
      CardinalityStore<ITEM, TAG> res;
      for (typename tagsets_t::const_iterator ts = tagsets.begin();
                  ts != tagsets.end(); ts++)
      {
            if (ts->first.find(tag) != ts->first.end())
                  continue;
            // For all tagsets that do not contain tag

            // Insert the selected tagset in the new collection
            res.consume(ts->second, ts->first);
      }

      return res;
}

template<class ITEM, class TAG>
00454 CardinalityStore<ITEM, TAG> CardinalityStore<ITEM, TAG>::getCollectionWithoutTagsetsHavingAnyOf(const OpSet<TAG>& tags) const
{
      CardinalityStore<ITEM, TAG> res;
      for (typename tagsets_t::const_iterator ts = tagsets.begin();
                  ts != tagsets.end(); ts++)
      {
            OpSet<TAG> inters = ts->first ^ tags;
            if (!inters.empty())
                  continue;
            // For all tagsets that do not contain some of the tags in `tag`

            // Insert the selected tagset in the new collection
            res.consume(ts->second, ts->first);
      }

      return res;
}

template<class ITEM, class TAG>
00473 int CardinalityStore<ITEM, TAG>::getCardinality(const TAG& tag) const
{
      typename TagContainer::const_iterator found = tags.find(tag);
      if (found == tags.end())
            return 0;
      else
            return found->second;
}

template<class ITEM, class TAG>
TAG CardinalityStore<ITEM, TAG>::findTagWithMaxCardinalityNotIn(const OpSet<TAG>& ntags, int* card) const
{
      TAG candidate;
      int maxcard = 0;

      for (typename TagContainer::const_iterator i = tags.begin();
                  i != tags.end(); i++)
            if (!ntags.contains(i->first) && i->second > maxcard)
            {
                  maxcard = i->second;
                  candidate = i->first;
            }
      
      if (card)
            *card = maxcard;
      return candidate;
}

template<class ITEM, class TAG>
void CardinalityStore<ITEM, TAG>::mergeEquivalentTags()
{
//fprintf(stderr, "mergeEquivalentTags\n");
      // Build a card->tags mapping
      map<int, OpSet<TAG> > cards;
      for (typename TagContainer::const_iterator t = tags.begin();
                  t != tags.end(); t++)
            cards[t->second] += t->first;

      // Look for tags with the same cardinality
      map<TAG, TAG> renames;
      OpSet<TAG> items_to_rename;
      for (typename map<int, OpSet<TAG> >::const_iterator c = cards.begin();
                  c != cards.end(); c++)
            if (c->second.size() > 1)
            {
//fprintf(stderr, "Found a set of %d tags with card %d\n", c->second.size(), c->first);
                  
                  // Find out equivalent items
                  //
                  // Intersect every set of tags with the same cardinality
                  // with all tagsets in the collection which gives non-null
                  // intersection.  The tags in the resulting set are equivalent.
                  OpSet<TAG> equivs = c->second;
                  for (typename tagsets_t::const_iterator i = tagsets.begin();
                              i != tagsets.end() && equivs.size() > 1; i++)
                  {
                        OpSet<TAG> intersection = c->second ^ i->first;
                        if (!intersection.empty())
                              equivs = intersection;
                  }
//fprintf(stderr, "Final intersection gives %d nodes\n", equivs.size());

                  if (equivs.size() > 1)
                  {
                        // equivs contains a set of equivalent tags
      
                        // Schedule the renaming of equivalent tags into their merged
                        // tag

                        // Compute the merged name
                        typename OpSet<TAG>::const_iterator j = equivs.begin();
                        TAG merged = *j;
                        int card = tags[*j];
                        for (j++; j != equivs.end(); j++)
                              merged = mergeTags(merged, *j);

//string mname = stringf::fmt(merged);
//fprintf(stderr, "Merged name: %.*s\n", PFSTR(mname));

                        // Insert the merged item in `tags'
                        tags.insert(make_pair(merged, card));

                        // Store the renames to be done
                        // Remove the now-merged items from `tags'
                        for (typename OpSet<TAG>::const_iterator j = equivs.begin();
                                    j != equivs.end(); j++)
                        {
                              renames.insert(make_pair(*j, merged));
                              items_to_rename += *j;
                              tags.erase(*j);
                        }
                  }
            }
      
      if (renames.size() > 0)
      {
            // Perform the renames

            // Find the tagsets that need rename
            vector< OpSet<TAG> > to_rename;
            for (typename tagsets_t::const_iterator i = tagsets.begin(); i != tagsets.end(); i++)
            {
                  OpSet<TAG> inters = i->first ^ items_to_rename;
                  if (!inters.empty())
                        to_rename.push_back(i->first);
            }

            // Rename the tags inside the selected tagsets
            for (typename vector< OpSet<TAG> >::const_iterator i = to_rename.begin();
                        i != to_rename.end(); i++)
            {
                  // Store the old tag data and remove it from tagsets
                  typename tagsets_t::iterator t = tagsets.find(*i);
                  OpSet<TAG> tags = t->first;
                  OpSet<ITEM> items = t->second;
                  tagsets.erase(t);
                  
                  // Compute the renamed tagset
                  OpSet<TAG> newtags;
                  for (typename OpSet<TAG>::const_iterator r = tags.begin();
                              r != tags.end(); r++)
                  {
                        typename map<TAG, TAG>::const_iterator ren = renames.find(*r);
                        if (ren == renames.end())
                              newtags += *r;
                        else
                              newtags += ren->second;
                  }

                  // Insert the resulting tagset back into tagsets
                  tagsets.insert(make_pair(newtags, items));
            }
      }
}

template<class ITEM, class TAG>
void CardinalityStore<ITEM, TAG>::removeTagsWithCardinalityLessThan(int card)
{
      // Build the list of tags to remove
      OpSet<TAG> to_remove;
      for (typename TagContainer::const_iterator i = tags.begin();
                  i != tags.end(); i++)
            if (i->second < card)
                  to_remove += i->first;

      // Remove the tags from `tags'
      for (typename OpSet<TAG>::const_iterator i = to_remove.begin();
                  i != to_remove.end(); i++)
            tags.erase(*i);

      // Select the tagsets to be changed from `tagsets'
      vector< OpSet<TAG> > to_change;
      for (typename tagsets_t::const_iterator i = tagsets.begin();
                  i != tagsets.end(); i++)
      {
            OpSet<TAG> inters = i->first ^ to_remove;
            if (! inters.empty())
                  to_change.push_back(i->first);
      }

      // Remove the tags from `tagsets'
      for (typename vector< OpSet<TAG> >::const_iterator i = to_change.begin();
                  i != to_change.end(); i++)
      {
            // Remove the old mapping
            typename tagsets_t::iterator t = tagsets.find(*i);
            OpSet<ITEM> items = t->second;
            tagsets.erase(t);

            // Compute the new tagset
            OpSet<TAG> newts = *i - to_remove;

            // Insert the new mapping
            t = tagsets.find(newts);
            if (t == tagsets.end())
                  tagsets.insert(make_pair(newts, items));
            else
                  t->second += items;
      }
}

template<class ITEM, class TAG>
00655 void CardinalityStore<ITEM, TAG>::output(Consumer<ITEM, TAG>& cons) const
{
      for (typename tagsets_t::const_iterator i = tagsets.begin(); i != tagsets.end(); i++)
            cons.consume(i->second, i->first);
}

template<class ITEM, class TAG>
00662 void CardinalityStore<ITEM, TAG>::outputHavingTags(const OpSet<TAG>& ts, Consumer<ITEM, TAG>& consumer) const
{
      for (typename tagsets_t::const_iterator t = tagsets.begin();
                  t != tagsets.end(); t++)
            if (t->first.contains(ts))
                  consumer.consume(t->second, t->first);
}

#ifndef INSTANTIATING_TEMPLATES
#include <string>

00673 namespace Tagcoll {
      template class CardinalityStore<std::string, std::string>;
}
#endif


#ifdef COMPILE_TESTSUITE

#include <tests/test-utils.h>

namespace tut {
using namespace tut_tagcoll;

struct tagcoll_cardinalitystore_shar {
};
TESTGRP(tagcoll_cardinalitystore);

template<> template<>
void to::test<1>()
{
      CardinalityStore<string, string> coll;

      test_tagged_collection(coll);
}

}

#endif
// vim:set ts=4 sw=4:

Generated by  Doxygen 1.6.0   Back to index