Logo Search packages:      
Sourcecode: tagcoll version File versions  Download package

TextFormat.cc

/*
 * Serialize a tagged collection to a text file
 *
 * Copyright (C) 2003,2004,2005  Enrico Zini <enrico@debian.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */

#include <tagcoll/TextFormat.h>
#include <tagcoll/Patches.h>
#include <tagcoll/stringf.h>

#include <errno.h>

using namespace std;
using namespace stringf;
using namespace Tagcoll;

static void printTagset(const OpSet<string>& ts, FILE* out)
{
      for (OpSet<string>::const_iterator i = ts.begin();
                  i != ts.end(); i++)
            if (i == ts.begin())
            {
                  if (fprintf(out, "%.*s", PFSTR(*i)) < 0)
                        throw SystemException(errno, "writing tagset");
            }
            else
            {
                  if (fprintf(out, ", %.*s", PFSTR(*i)) < 0)
                        throw SystemException(errno, "writing tagset");
            }
}

template<class ITEM, class TAG>
00048 void TextFormat<ITEM, TAG>::consumeItemUntagged(const ITEM& item)
{
      string sitem = itemconv(item);
      if (fprintf(out, "%.*s\n", PFSTR(sitem)) < 0)
            throw SystemException(errno, "writing item");
}

template<class ITEM, class TAG>
00056 void TextFormat<ITEM, TAG>::consumeItem(const ITEM& item, const OpSet<TAG>& tags)
{
      string sitem = itemconv(item);
      if (fprintf(out, "%.*s: ", PFSTR(sitem)) < 0)
            throw SystemException(errno, "writing item");
      printTagset(tagconv(tags), out);
      if (fprintf(out, "\n") < 0)
            throw SystemException(errno, "writing newline after tagset");
}

template<class ITEM, class TAG>
00067 void TextFormat<ITEM, TAG>::consumeItemsUntagged(const OpSet<ITEM>& items)
{
      printTagset(itemconv(items), out);
      if (fprintf(out, "\n") < 0)
            throw SystemException(errno, "writing newline after items");
}

template<class ITEM, class TAG>
00075 void TextFormat<ITEM, TAG>::consumeItems(const OpSet<ITEM>& items, const OpSet<TAG>& tags)
{
      printTagset(itemconv(items), out);
      if (fprintf(out, ": ") < 0)
            throw SystemException(errno, "writing colon after items");
      printTagset(tagconv(tags), out);
      if (fprintf(out, "\n") < 0)
            throw SystemException(errno, "writing newline after tagset");
}


template<class ITEM, class TAG>
00087 void TextFormat<ITEM, TAG>::outputPatch(
            Converter<ITEM, std::string>& itemconv,
            Converter<TAG, std::string>& tagconv,
            const PatchList<ITEM, TAG>& patch,
            FILE* out)
{
      for (typename PatchList<ITEM, TAG>::const_iterator i = patch.begin();
                  i != patch.end(); i++)
      {
            string sitem = itemconv(i->first);
            if (fprintf(out, "%.*s: ", PFSTR(sitem)) < 0)
                  throw SystemException(errno, "writing item");

            OpSet<string> stags;
            for (typename OpSet<TAG>::const_iterator j = i->second.getAdded().begin();
                        j != i->second.getAdded().end(); j++)
                  stags += "+"+tagconv(*j);
            for (typename OpSet<TAG>::const_iterator j = i->second.getRemoved().begin();
                        j != i->second.getRemoved().end(); j++)
                  stags += "-"+tagconv(*j);

            printTagset(stags, out);
            if (fprintf(out, "\n") < 0)
                  throw SystemException(errno, "writing newline after tagset");
      }
}


// Parse an element
// Return the trailing separating char, that can be:
//  ParserInput::Eof
//  '\n'
//  ':'
//  ','
// Return the item in `item'

// element: \s*[^ \t,:]\s*([.:])\s*
// or
// element: \s*[^ \t,:].*?[^ \t,:]\s*([.:])\s+
static int parseElement(ParserInput& in, string& item) throw (ParserException)
{
      item = string();
      string sep;
      int c;
      char sepchar = 0;
      enum {LSPACE, ITEM, ISPACE, ISEP, TSPACE} state = LSPACE;
      while ((c = in.nextChar()) != ParserInput::Eof)
      {
            if (c == '\n')
            {
                  if (sepchar && sepchar != ':')
                        throw ParserException("separator character ends the line");
                  else
                        return '\n';
            }
            switch (state)
            {
                  // Optional leading space
                  case LSPACE:
                        switch (c)
                        {
                              case ' ':
                              case '\t':
                                    break;
                              case ':':
                              case ',':
                                    throw ParserException("element cannot start with a separation character");
                                    break;
                              default:
                                    item += c;
                                    state = ITEM;
                                    break;
                        }
                        break;
                  // Non-separating characters
                  case ITEM:
                        switch (c)
                        {
                              case ' ':
                              case '\t':
                                    sep += c;
                                    state = ISPACE;
                                    break;
                              case ':':
                              case ',':
                                    sepchar = c;
                                    sep += c;
                                    state = ISEP;
                                    break;
                              default:
                                    item += c;
                                    break;
                        }
                        break;
                  // Space inside item or at the end of item
                  case ISPACE:
                        switch (c)
                        {
                              case ' ':
                              case '\t':
                                    sep += c;
                                    break;
                              case ':':
                              case ',':
                                    sepchar = c;
                                    state = TSPACE;
                                    break;
                              default:
                                    item += sep;
                                    item += c;
                                    sep = string();
                                    state = ITEM;
                                    break;
                        }
                        break;
                  // Separator inside item or at the end of item
                  case ISEP:
                        switch (c)
                        {
                              case ' ':
                              case '\t':
                                    if (sep.size() > 1)
                                          throw ParserException("item is followed by more than one separator characters");
                                    state = TSPACE;
                                    break;
                              case ':':
                              case ',':
                                    sep += c;
                                    break;
                              default:
                                    item += sep;
                                    item += c;
                                    sepchar = 0;
                                    sep = string();
                                    state = ITEM;
                                    break;
                        }
                        break;
                  case TSPACE:
                        switch (c)
                        {
                              case ' ':
                              case '\t':
                                    break;
                              default:
                                    in.pushChar(c);
                                    return sepchar;
                        }
                        break;
            }
      }
      return ParserInput::Eof;
}

// item1, item2, item3: tag1, tag2, tag3

//#define TRACE_PARSE
static void parseTextFormat(
            ParserInput& in,
            Consumer<string, string>& consumer)
{
      string item;

      OpSet<string> itemset;
      OpSet<string> tagset;
      int sep;
      enum {ITEMS, TAGS} state = ITEMS;
      int line = 1;
      do
      {
            try {
                  sep = parseElement(in, item);
            } catch (ParserException& e) {
                  // Add the line number and propagate
                  e.line(line);
                  throw e;
            }
            
            if (item.size() != 0)
                  if (state == ITEMS)
                        itemset += item;
                  else
                        tagset += item;
            
            switch (sep)
            {
                  case '\n':
                        line++;
                  case ParserInput::Eof:
                        if (!(itemset.empty() && tagset.empty()))
                        {
                              if (itemset.empty())
                                    throw ParserException(line, "no elements before `:' separator");
                              if (tagset.empty())
                                    consumer.consume(itemset);
                              else
                                    consumer.consume(itemset, tagset);
                        }
                        itemset.clear();
                        tagset.clear();
                        state = ITEMS;
                        break;
                  case ':':
                        if (state == TAGS)
                              throw ParserException(line, "separator `:' appears twice");
                        state = TAGS;
                        break;
                  default:
                        break;
            }
      } while (sep != ParserInput::Eof);
}


template<class ITEM, class TAG>
00302 void TextFormat<ITEM, TAG>::parse(
            Converter<std::string, ITEM>& itemconv,
            Converter<std::string, TAG>& tagconv,
            ParserInput& in,
            Consumer<ITEM, TAG>& consumer)
{
      ConversionFilter<string, string, ITEM, TAG> conv(itemconv, tagconv, consumer);
      parseTextFormat(in, conv);
}

template<class ITEM, class TAG>
class PatchBuilder : public Consumer<string, string>
{
protected:
      PatchList<ITEM, TAG> patch;
      Converter<std::string, ITEM> itemconv;
      Converter<std::string, TAG> tagconv;

      virtual void consumeItemUntagged(const string& item) {}

      virtual void consumeItem(const string& item, const OpSet<string>& tags)
      {
            ITEM it = itemconv(item);
            if (it == ITEM())
                  return;

            Patch<ITEM, TAG> p(it);
            for (OpSet<string>::const_iterator i = tags.begin(); i != tags.end(); i++)
            {
                  TAG tag = tagconv(i->substr(1));
                  if (tag != TAG())
                        if ((*i)[0] == '-')
                              p.remove(tag);
                        else if ((*i)[0] == '+')
                              p.add(tag);
            }
            patch.addPatch(p);
      }

      virtual void consumeItemsUntagged(const OpSet<string>& items) {}

      virtual void consumeItems(const OpSet<string>& items, const OpSet<string>& tags)
      {
            OpSet<TAG> added;
            OpSet<TAG> removed;

            for (OpSet<string>::const_iterator i = tags.begin(); i != tags.end(); i++)
            {
                  TAG tag = tagconv(i->substr(1));
                  if (tag != TAG())
                        if ((*i)[0] == '-')
                              removed += tag;
                        else if ((*i)[0] == '+')
                              added += tag;
            }

            for (OpSet<string>::const_iterator i = items.begin(); i != items.end(); i++)
            {
                  ITEM it = itemconv(*i);
                  if (it != ITEM())
                        patch.addPatch(Patch<ITEM, TAG>(it, added, removed));
            }
      }


public:
      PatchBuilder(
                  const Converter<std::string, ITEM>& itemconv,
                  const Converter<std::string, TAG>& tagconv)
            : itemconv(itemconv), tagconv(tagconv) {}
      virtual ~PatchBuilder() {}
      
      const PatchList<ITEM, TAG>& getPatch() const throw () { return patch; }
};

template<class ITEM, class TAG>
00378 PatchList<ITEM, TAG> TextFormat<ITEM, TAG>::parsePatch(
            Converter<std::string, ITEM>& itemconv,
            Converter<std::string, TAG>& tagconv,
            ParserInput& in)
{
      PatchBuilder<ITEM, TAG> builder(itemconv, tagconv);
      parseTextFormat(in, builder);
      return builder.getPatch();
}

#ifndef INSTANTIATING_TEMPLATES
#include <string>

namespace Tagcoll {
      template class TextFormat<std::string, std::string>;
}
#endif


#ifdef COMPILE_TESTSUITE

#include <tests/test-utils.h>
#include <tagcoll/StringParserInput.h>

namespace tut {
using namespace tut_tagcoll;

struct tagcoll_textformat_shar {
};
TESTGRP(tagcoll_textformat);

template<> template<>
void to::test<1>()
{
      StringParserInput coll(
                  "a: b, c\n"
                  "b:\n"
                  "c: \n"
                  "d:  c::D, e::F,    f::g\n"
      );
      
      TestConsumer<string, string> cons;

      Converter<string, string> a;
      TextFormat<string, string>::parse(a, a, coll, cons);

      ensure_equals(cons.items, 4);
      ensure_equals(cons.tags, 5);
}

template<> template<>
void to::test<2>()
{
      StringParserInput coll(
                  "a: +b, +c\n"
                  "b:\n"
                  "c: foo\n"
                  "d:  +c::D, -e::F,    -f::g\n"
      );
      
      Converter<string, string> a;
      PatchList<string, string> plist(TextFormat<string, string>::parsePatch(a, a, coll));

      /*
      cerr << "Patchlist[" << plist.size() << "]:" << endl;
      for (PatchList<string, string>::const_iterator i = plist.begin();
                  i != plist.end(); i++)
            cerr << " " << i->first << ": " << 
                  i->second.getAdded().size()  << "x" <<
                  i->second.getRemoved().size()  << endl;
      */

      ensure_equals(plist.size(), 2);
      gen_ensure(plist.find("a") != plist.end());
      gen_ensure(plist.find("b") == plist.end());
      gen_ensure(plist.find("c") == plist.end());
      gen_ensure(plist.find("d") != plist.end());

      PatchList<string, string>::const_iterator i = plist.find("a");
      ensure_equals(i->second.getAdded().size(), 2);
      ensure_equals(i->second.getRemoved().size(), 0);

      i = plist.find("d");
      ensure_equals(i->second.getAdded().size(), 1);
      ensure_equals(i->second.getRemoved().size(), 2);
}

}

#endif

// vim:set ts=4 sw=4:

Generated by  Doxygen 1.6.0   Back to index