Skip to Content.
Sympa Menu

overpass - Re: [overpass] Segmentation fault

Subject: Overpass API developpement

List archive

Re: [overpass] Segmentation fault


Chronological Thread 
  • From: Roland Olbricht <>
  • To: , Igor Brejc <>
  • Subject: Re: [overpass] Segmentation fault
  • Date: Mon, 31 Jul 2017 10:41:00 +0200

Hi,

I've forgotten the files.

Best regards,

Roland
/** Copyright 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Roland Olbricht et al.
 *
 * This file is part of Overpass_API.
 *
 * Overpass_API is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * Overpass_API is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with Overpass_API.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "node_updater.h"
#include "osm_updater.h"
#include "relation_updater.h"
#include "tags_updater.h"
#include "way_updater.h"
#include "../../expat/expat_justparse_interface.h"
#include "../../template_db/dispatcher_client.h"
#include "../../template_db/random_file.h"
#include "../../template_db/transaction.h"
#include "../core/settings.h"
#include "../data/abstract_processing.h"
#include "../data/collect_members.h"
#include "../dispatch/resource_manager.h"
#include "../frontend/output.h"

#include <dirent.h>
#include <sys/types.h>
#include <unistd.h>

#include <cstdio>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <list>
#include <sstream>


/**
 * Tests the library node_updater, way_updater and relation_updater
 * with a sample OSM file
 */

namespace
{
  Node_Updater* node_updater(0);
  Node current_node;
  Way_Updater* way_updater(0);
  Way current_way;
  Relation_Updater* relation_updater(0);
  Transaction* transaction_(0);
  Relation current_relation;
  int state;
  const int IN_NODES = 1;
  const int IN_WAYS = 2;
  const int IN_RELATIONS = 3;
  int modify_mode = 0;
  const int DELETE = 1;
  uint flush_limit = 4*1024*1024;
  OSM_Element_Metadata* meta;

  uint32 osm_element_count;
  Osm_Backend_Callback* callback(0);
  Cpu_Stopwatch* cpu_stopwatch(0);

  std::string data_version;

  inline void tag_start(const char **attr)
  {
    std::string key(""), value("");
    for (unsigned int i(0); attr[i]; i += 2)
    {
      if (!strcmp(attr[i], "k"))
	key = attr[i+1];
      if (!strcmp(attr[i], "v"))
	value = attr[i+1];
    }
    if (current_node.id.val() > 0)
      current_node.tags.push_back(std::make_pair(key, value));
    else if (current_way.id.val() > 0)
      current_way.tags.push_back(std::make_pair(key, value));
    else if (current_relation.id.val() > 0)
      current_relation.tags.push_back(std::make_pair(key, value));
  }


  inline void nd_start(const char **attr)
  {
    if (current_way.id.val() > 0)
    {
      Uint64 ref;
      for (unsigned int i(0); attr[i]; i += 2)
      {
	if (!strcmp(attr[i], "ref"))
	  ref = atoll(attr[i+1]);
      }
      current_way.nds.push_back(ref);
    }
  }


  inline void member_start(const char **attr)
  {
    if (current_relation.id.val() > 0)
    {
      Uint64 ref;
      std::string type, role;
      for (unsigned int i(0); attr[i]; i += 2)
      {
	if (!strcmp(attr[i], "ref"))
	  ref = atoll(attr[i+1]);
	if (!strcmp(attr[i], "type"))
	  type = attr[i+1];
	if (!strcmp(attr[i], "role"))
	  role = attr[i+1];
      }
      Relation_Entry entry;
      entry.ref = ref;
      if (type == "node")
	entry.type = Relation_Entry::NODE;
      else if (type == "way")
	entry.type = Relation_Entry::WAY;
      else if (type == "relation")
	entry.type = Relation_Entry::RELATION;
      entry.role = relation_updater->get_role_id(role);
      current_relation.members.push_back(entry);
    }
  }


  inline void node_start(const char **attr)
  {
    if (state == 0)
      state = IN_NODES;
    if (meta)
      *meta = OSM_Element_Metadata();

    Node::Id_Type id;
    double lat(100.0), lon(200.0);
    for (unsigned int i(0); attr[i]; i += 2)
    {
      if (!strcmp(attr[i], "id"))
	id = atoll(attr[i+1]);
      if (!strcmp(attr[i], "lat"))
	lat = atof(attr[i+1]);
      if (!strcmp(attr[i], "lon"))
	lon = atof(attr[i+1]);
      if (meta && (!strcmp(attr[i], "version")))
	meta->version = atoi(attr[i+1]);
      if (meta && (!strcmp(attr[i], "timestamp")))
      {
        meta->timestamp = Timestamp(
            atol(attr[i+1]), //year
            atoi(attr[i+1]+5), //month
            atoi(attr[i+1]+8), //day
            atoi(attr[i+1]+11), //hour
            atoi(attr[i+1]+14), //minute
            atoi(attr[i+1]+17) //second
            ).timestamp;
      }
      if (meta && (!strcmp(attr[i], "changeset")))
	meta->changeset = atoi(attr[i+1]);
      if (meta && (!strcmp(attr[i], "user")))
	meta->user_name = attr[i+1];
      if (meta && (!strcmp(attr[i], "uid")))
	meta->user_id = atoi(attr[i+1]);
    }
    if (lat >= -90. && lat <= 90. && lon >= -180. && lon <= 180.)
      current_node = Node(id, lat, lon);
    else
      current_node = Node(id, 100., 200.);
  }


  inline void node_end()
  {
    if (modify_mode == DELETE)
      node_updater->set_id_deleted(current_node.id, meta);
    else
      node_updater->set_node(current_node, meta);
    if (osm_element_count >= flush_limit)
    {
      callback->node_elapsed(current_node.id);
      node_updater->update(callback, cpu_stopwatch, true);
      callback->parser_started();
      osm_element_count = 0;
    }
    current_node.id = Node::Id_Type();
  }


  inline void way_start(const char **attr)
  {
    if (state == IN_NODES)
    {
      callback->nodes_finished();
      node_updater->update(callback, cpu_stopwatch, false);
  {
    File_Blocks_Index_Base* index = transaction_->data_index(osm_base_settings().NODES);
    ((File_Blocks_Index< Uint32_Index >*)index)->DEBUG_check_total_index_size();
  }
      //way_updater->update_moved_idxs(callback, node_updater->get_moved_nodes(), update_way_logger);
      callback->parser_started();
      osm_element_count = 0;
      state = IN_WAYS;
    }
    else if (state == 0)
      state = IN_WAYS;
    if (meta)
      *meta = OSM_Element_Metadata();

    Way::Id_Type id;
    for (unsigned int i(0); attr[i]; i += 2)
    {
      if (!strcmp(attr[i], "id"))
	id = atoll(attr[i+1]);
      if (meta && (!strcmp(attr[i], "version")))
	meta->version = atoi(attr[i+1]);
      if (meta && (!strcmp(attr[i], "timestamp")))
      {
	meta->timestamp = 0;
	meta->timestamp |= (atoll(attr[i+1])<<26); //year
	meta->timestamp |= (atoi(attr[i+1]+5)<<22); //month
	meta->timestamp |= (atoi(attr[i+1]+8)<<17); //day
	meta->timestamp |= (atoi(attr[i+1]+11)<<12); //hour
	meta->timestamp |= (atoi(attr[i+1]+14)<<6); //minute
	meta->timestamp |= atoi(attr[i+1]+17); //second
      }
      if (meta && (!strcmp(attr[i], "changeset")))
	meta->changeset = atoi(attr[i+1]);
      if (meta && (!strcmp(attr[i], "user")))
	meta->user_name = attr[i+1];
      if (meta && (!strcmp(attr[i], "uid")))
	meta->user_id = atoi(attr[i+1]);
    }
    current_way = Way(id.val());
  }


  inline void way_end()
  {
    if (modify_mode == DELETE)
      way_updater->set_id_deleted(current_way.id, meta);
    else
      way_updater->set_way(current_way, meta);
    if (osm_element_count >= flush_limit)
    {
      callback->way_elapsed(current_way.id);
      way_updater->update(callback, cpu_stopwatch, true,
                          node_updater->get_new_skeletons(), node_updater->get_attic_skeletons(),
                          node_updater->get_new_attic_skeletons());
      callback->parser_started();
      osm_element_count = 0;
    }
    current_way.id = 0u;
  }


  inline void relation_end()
  {
    if (modify_mode == DELETE)
      relation_updater->set_id_deleted(current_relation.id, meta);
    else
      relation_updater->set_relation(current_relation, meta);
    if (osm_element_count >= flush_limit)
    {
      callback->relation_elapsed(current_relation.id);
      relation_updater->update(callback, cpu_stopwatch,
                          node_updater->get_new_skeletons(), node_updater->get_attic_skeletons(),
                          node_updater->get_new_attic_skeletons(),
                          way_updater->get_new_skeletons(), way_updater->get_attic_skeletons(),
                          way_updater->get_new_attic_skeletons());
      callback->parser_started();
      osm_element_count = 0;
    }
    current_relation.id = 0u;
  }


  inline void relation_start(const char **attr)
  {
    if (state == IN_NODES)
    {
      callback->nodes_finished();
      node_updater->update(callback, cpu_stopwatch, false);
//       relation_updater->update_moved_idxs
//           (node_updater->get_moved_nodes(), way_updater->get_moved_ways(), update_relation_logger);
      callback->parser_started();
      osm_element_count = 0;
      state = IN_RELATIONS;
    }
    else if (state == IN_WAYS)
    {
      callback->ways_finished();
      way_updater->update(callback, cpu_stopwatch, false,
                          node_updater->get_new_skeletons(), node_updater->get_attic_skeletons(),
                          node_updater->get_new_attic_skeletons());
//       relation_updater->update_moved_idxs
//           (node_updater->get_moved_nodes(), way_updater->get_moved_ways(), update_relation_logger);
  {
    File_Blocks_Index_Base* index = transaction_->data_index(osm_base_settings().NODES);
    ((File_Blocks_Index< Uint32_Index >*)index)->DEBUG_check_total_index_size();
  }
      callback->parser_started();
      osm_element_count = 0;
      state = IN_RELATIONS;
    }
    else if (state == 0)
      state = IN_RELATIONS;
    if (meta)
      *meta = OSM_Element_Metadata();

    Relation::Id_Type id;
    for (unsigned int i(0); attr[i]; i += 2)
    {
      if (!strcmp(attr[i], "id"))
	id = atoll(attr[i+1]);
      if (meta && (!strcmp(attr[i], "version")))
	meta->version = atoi(attr[i+1]);
      if (meta && (!strcmp(attr[i], "timestamp")))
      {
	meta->timestamp = 0;
	meta->timestamp |= (atoll(attr[i+1])<<26); //year
	meta->timestamp |= (atoi(attr[i+1]+5)<<22); //month
	meta->timestamp |= (atoi(attr[i+1]+8)<<17); //day
	meta->timestamp |= (atoi(attr[i+1]+11)<<12); //hour
	meta->timestamp |= (atoi(attr[i+1]+14)<<6); //minute
	meta->timestamp |= atoi(attr[i+1]+17); //second
      }
      if (meta && (!strcmp(attr[i], "changeset")))
	meta->changeset = atoi(attr[i+1]);
      if (meta && (!strcmp(attr[i], "user")))
	meta->user_name = attr[i+1];
      if (meta && (!strcmp(attr[i], "uid")))
	meta->user_id = atoi(attr[i+1]);
    }
    current_relation = Relation(id.val());
  }
}


void node_start(const char *el, const char **attr)
{
  if (!strcmp(el, "tag"))
    tag_start(attr);
  else if (!strcmp(el, "node"))
    node_start(attr);
  else if (!strcmp(el, "delete"))
    modify_mode = DELETE;
}

void node_end(const char *el)
{
  if (!strcmp(el, "node"))
    node_end();
  else if (!strcmp(el, "delete"))
    modify_mode = 0;
  ++osm_element_count;
}

void way_start(const char *el, const char **attr)
{
  if (!strcmp(el, "tag"))
    tag_start(attr);
  else if (!strcmp(el, "nd"))
    nd_start(attr);
  else if (!strcmp(el, "way"))
    way_start(attr);
  else if (!strcmp(el, "delete"))
    modify_mode = DELETE;
}

void way_end(const char *el)
{
  if (!strcmp(el, "way"))
    way_end();
  else if (!strcmp(el, "delete"))
    modify_mode = 0;
  ++osm_element_count;
}

void relation_start(const char *el, const char **attr)
{
  if (!strcmp(el, "tag"))
    tag_start(attr);
  else if (!strcmp(el, "member"))
    member_start(attr);
  else if (!strcmp(el, "relation"))
    relation_start(attr);
  else if (!strcmp(el, "delete"))
    modify_mode = DELETE;
}

void relation_end(const char *el)
{
  if (!strcmp(el, "relation"))
    relation_end();
  else if (!strcmp(el, "delete"))
    modify_mode = 0;
  ++osm_element_count;
}

void start(const char *el, const char **attr)
{
  if (!strcmp(el, "tag"))
    tag_start(attr);
  else if (!strcmp(el, "nd"))
    nd_start(attr);
  else if (!strcmp(el, "member"))
    member_start(attr);
  else if (!strcmp(el, "node"))
    node_start(attr);
  else if (!strcmp(el, "way"))
    way_start(attr);
  else if (!strcmp(el, "relation"))
    relation_start(attr);
  else if (!strcmp(el, "delete"))
    modify_mode = DELETE;
}

void end(const char *el)
{
  if (!strcmp(el, "node"))
    node_end();
  else if (!strcmp(el, "way"))
    way_end();
  else if (!strcmp(el, "relation"))
    relation_end();
  else if (!strcmp(el, "delete"))
    modify_mode = 0;
  ++osm_element_count;
}


// Bitfield for change evaluation
const int TAGS = 1;
const int GEOMETRY = 2;
const int MEMBERS = 4;
const int WAY_MEMBERSHIP = 8;
const int RELATION_MEMBERSHIP = 16;
const int INDIRECT_MEMBERSHIP = 32;
const int MEMBER_PROPERTIES = 64;


void Osm_Updater::finish_updater()
{
  {
    File_Blocks_Index_Base* index = transaction_->data_index(osm_base_settings().NODES);
    ((File_Blocks_Index< Uint32_Index >*)index)->DEBUG_check_total_index_size();
  }
  if (state == IN_NODES)
    callback->nodes_finished();
  else if (state == IN_WAYS)
    callback->ways_finished();
  else if (state == IN_RELATIONS)
    callback->relations_finished();

  if (state == IN_NODES)
  {
    node_updater->update(callback, cpu_stopwatch, false);
    //way_updater->update_moved_idxs(callback, node_updater->get_moved_nodes(), update_way_logger);
    state = IN_WAYS;
  }
  if (state == IN_WAYS)
  {
    way_updater->update(callback, cpu_stopwatch, false,
                        node_updater->get_new_skeletons(), node_updater->get_attic_skeletons(),
                        node_updater->get_new_attic_skeletons());
//     relation_updater->update_moved_idxs
//         (node_updater->get_moved_nodes(), way_updater->get_moved_ways(), update_relation_logger);
    state = IN_RELATIONS;
  }
  if (state == IN_RELATIONS)
    relation_updater->update(callback, cpu_stopwatch,
                          node_updater->get_new_skeletons(), node_updater->get_attic_skeletons(),
                          node_updater->get_new_attic_skeletons(),
                          way_updater->get_new_skeletons(), way_updater->get_attic_skeletons(),
                          way_updater->get_new_attic_skeletons());

  {
    File_Blocks_Index_Base* index = transaction_->data_index(osm_base_settings().NODES);
    ((File_Blocks_Index< Uint32_Index >*)index)->DEBUG_check_total_index_size();
  }
  flush();
  callback->parser_succeeded();
}

void Osm_Updater::parse_file_completely(FILE* in)
{
  callback->parser_started();
  parse(stdin, start, end);

  finish_updater();
}

void parse_nodes_only(FILE* in)
{
  parse(in, node_start, node_end);
}

void parse_ways_only(FILE* in)
{
  parse(in, way_start, way_end);
}

void parse_relations_only(FILE* in)
{
  parse(in, relation_start, relation_end);
}

Osm_Updater::Osm_Updater(Osm_Backend_Callback* callback_, const std::string& data_version_,
			 meta_modes meta_, unsigned int flush_limit_)
  : dispatcher_client(0), meta(meta_)
{
  dispatcher_client = new Dispatcher_Client(osm_base_settings().shared_name);
  Logger logger(dispatcher_client->get_db_dir());
  logger.annotated_log("write_start() start version='" + data_version_ + '\'');
  dispatcher_client->write_start();
  logger.annotated_log("write_start() end");
  transaction = new Nonsynced_Transaction
      (true, true, dispatcher_client->get_db_dir(), "");
  {
    std::ofstream version((dispatcher_client->get_db_dir()
        + "osm_base_version.shadow").c_str());
    version<<data_version_<<'\n';
  }
  
  transaction_ = transaction;
  node_updater_ = new Node_Updater(*transaction, meta);
  way_updater_ = new Way_Updater(*transaction, meta);
  relation_updater_ = new Relation_Updater(*transaction, meta);
  flush_limit = flush_limit_;

  data_version = data_version_;

  state = 0;
  osm_element_count = 0;
  node_updater = node_updater_;
  way_updater = way_updater_;
  relation_updater = relation_updater_;
  callback = callback_;
  cpu_stopwatch = new Cpu_Stopwatch();
  cpu_stopwatch->start_cpu_timer(0);
  if (meta)
    ::meta = new OSM_Element_Metadata();
  
  {
    File_Blocks_Index_Base* index = transaction_->data_index(osm_base_settings().NODES);
    ((File_Blocks_Index< Uint32_Index >*)index)->DEBUG_check_total_index_size();
  }
}

Osm_Updater::Osm_Updater
    (Osm_Backend_Callback* callback_, std::string db_dir, const std::string& data_version_,
     meta_modes meta_, unsigned int flush_limit_)
  : transaction(0), dispatcher_client(0), db_dir_(db_dir), meta(meta_)
{
  if (file_present(db_dir + osm_base_settings().shared_name))
    throw Context_Error("File " + db_dir + osm_base_settings().shared_name + " present, "
        "which indicates a running dispatcher. Delete file if no dispatcher is running.");

  {
    std::ofstream version((db_dir + "osm_base_version").c_str());
    version<<data_version_<<'\n';
  }

  node_updater_ = new Node_Updater(db_dir, meta);
  way_updater_ = new Way_Updater(db_dir, meta);
  relation_updater_ = new Relation_Updater(db_dir, meta);
  flush_limit = flush_limit_;

  data_version = data_version_;

  state = 0;
  osm_element_count = 0;
  node_updater = node_updater_;
  way_updater = way_updater_;
  relation_updater = relation_updater_;
  callback = callback_;
  if (meta)
    ::meta = new OSM_Element_Metadata();
}

void Osm_Updater::flush()
{
  delete node_updater_;
  node_updater_ = new Node_Updater(db_dir_, meta ? keep_meta : only_data);
  delete way_updater_;
  way_updater_ = new Way_Updater(db_dir_, meta);
  delete relation_updater_;
  relation_updater_ = new Relation_Updater(db_dir_, meta);
  if (cpu_stopwatch)
    cpu_stopwatch->stop_cpu_timer(0);
  std::vector< uint64 > cpu_runtime = cpu_stopwatch ? cpu_stopwatch->cpu_time() : std::vector< uint64 >();

  if (dispatcher_client)
  {
    delete transaction;
    transaction = 0;
    
    Logger logger(dispatcher_client->get_db_dir());
    std::ostringstream out;
    out<<"write_commit() start "<<global_read_counter();
    for (std::vector< uint64 >::const_iterator it = cpu_runtime.begin(); it != cpu_runtime.end(); ++it)
      out<<' '<<*it;
    logger.annotated_log(out.str());
    
    dispatcher_client->write_commit();
    rename((dispatcher_client->get_db_dir() + "osm_base_version.shadow").c_str(),
	   (dispatcher_client->get_db_dir() + "osm_base_version").c_str());
    
    logger.annotated_log("write_commit() end");
    delete dispatcher_client;
    dispatcher_client = 0;
  }
}

Osm_Updater::~Osm_Updater()
{
  delete node_updater_;
  delete way_updater_;
  delete relation_updater_;
  if (::meta)
    delete ::meta;

  if (dispatcher_client)
  {
    if (transaction)
      delete transaction;
    Logger logger(dispatcher_client->get_db_dir());
    logger.annotated_log("write_rollback() start");
    dispatcher_client->write_rollback();
    logger.annotated_log("write_rollback() end");
    delete dispatcher_client;
  }
}
/** Copyright 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Roland Olbricht et al.
 *
 * This file is part of Template_DB.
 *
 * Template_DB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * Template_DB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with Overpass_API.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef DE__OSM3S___TEMPLATE_DB__FILE_BLOCKS_INDEX_H
#define DE__OSM3S___TEMPLATE_DB__FILE_BLOCKS_INDEX_H

#include "types.h"

#include <unistd.h>

#include <algorithm>
#include <cerrno>
#include <cstdlib>
#include <list>
#include <string>
#include <vector>

/** Declarations: -----------------------------------------------------------*/


template< class TIndex >
struct File_Block_Index_Entry
{
  static const int EMPTY = 1;
  static const int GROUP = 2;
  static const int SEGMENT = 3;
  static const int LAST_SEGMENT = 4;
  
  File_Block_Index_Entry(const TIndex& index_, uint32 pos_, uint32 size_, uint32 max_keysize_)
    : index(index_), pos(pos_), size(size_), max_keysize(max_keysize_) {}
  
  TIndex index;
  uint32 pos;
  uint32 size;
  uint32 max_keysize;
};

#include <iostream>

template< class TIndex >
struct File_Blocks_Index : public File_Blocks_Index_Base
{
public:
  File_Blocks_Index(const File_Properties& file_prop,
	      bool writeable, bool use_shadow,
	      const std::string& db_dir, const std::string& file_name_extension);
  virtual ~File_Blocks_Index();
  bool writeable() const { return (empty_index_file_name != ""); }
  const std::string& file_name_extension() const { return file_name_extension_; }
    
  std::string get_data_file_name() const { return data_file_name; }
  uint64 get_block_size() const { return block_size_; }
  uint32 get_compression_factor() const { return compression_factor; }
  uint32 get_compression_method() const { return compression_method; }
    
  std::list< File_Block_Index_Entry< TIndex > >& get_blocks()
  {
    if (index_buf.ptr)
      init_blocks();
    return blocks;
  }
  std::vector< std::pair< uint32, uint32 > >& get_void_blocks()
  {
    if (!void_blocks_initialized)
      init_void_blocks();
    return void_blocks;
  }
  
  uint DEBUG_check_total_index_size()
  {
    uint index_size = 0;
    for (typename std::list< File_Block_Index_Entry< TIndex > >::const_iterator
        it(blocks.begin()); it != blocks.end(); ++it)
      index_size += 12 + it->index.size_of();
    std::cerr<<"DEBUG "<<index_size<<'\n';
    return index_size;
  }
    
  static const int FILE_FORMAT_VERSION = 7512;
  static const int NO_COMPRESSION = 0;
  static const int ZLIB_COMPRESSION = 1;
  static const int LZ4_COMPRESSION = 2;
    
private:
  std::string index_file_name;
  std::string empty_index_file_name;
  std::string data_file_name;
  std::string file_name_extension_;
  Void_Pointer< uint8 > index_buf;
  uint64 file_size;
  uint32 index_size;
  std::list< File_Block_Index_Entry< TIndex > > blocks;
  std::vector< std::pair< uint32, uint32 > > void_blocks;
  bool void_blocks_initialized;
    
  uint64 block_size_;
  uint32 compression_factor;
  int compression_method;
    
  void init_structure_params();
  void init_blocks();
  void init_void_blocks();
  
public:
  uint32 block_count;  
};


template< class TIndex >
std::vector< bool > get_data_index_footprint(const File_Properties& file_prop,
					std::string db_dir);

/** Implementation File_Blocks_Index: ---------------------------------------*/

template< class TIndex >
File_Blocks_Index< TIndex >::File_Blocks_Index
    (const File_Properties& file_prop, bool writeable, bool use_shadow,
     const std::string& db_dir, const std::string& file_name_extension) :
     index_file_name(db_dir + file_prop.get_file_name_trunk()
         + file_name_extension + file_prop.get_data_suffix()
         + file_prop.get_index_suffix()
	 + (use_shadow ? file_prop.get_shadow_suffix() : "")),
     empty_index_file_name(writeable ? db_dir + file_prop.get_file_name_trunk()
         + file_name_extension + file_prop.get_data_suffix()
         + file_prop.get_shadow_suffix() : ""),
     data_file_name(db_dir + file_prop.get_file_name_trunk()
         + file_name_extension + file_prop.get_data_suffix()),
     file_name_extension_(file_name_extension),
     index_buf(0), file_size(0), index_size(0),
     void_blocks_initialized(false),
     block_size_(file_prop.get_block_size()), // can be overwritten by index file
     compression_factor(file_prop.get_compression_factor()), // can be overwritten by index file
     compression_method(file_prop.get_compression_method()), // can be overwritten by index file
     block_count(0)
{
  try
  {
    Raw_File val_file(data_file_name, O_RDONLY, S_666, "File_Blocks_Index::File_Blocks_Index::1");
    file_size = val_file.size("File_Blocks_Index::File_Blocks_Index::2");
  }
  catch (File_Error e)
  {
    if (e.error_number != 2)
      throw e;
  }
  
  try
  {
    Raw_File source_file(index_file_name, O_RDONLY, S_666,
			 "File_Blocks_Index::File_Blocks_Index::3");
			 
    // read index file
    index_size = source_file.size("File_Blocks_Index::File_Blocks_Index::4");
    index_buf.resize(index_size);
    source_file.read(index_buf.ptr, index_size, "File_Blocks_Index::File_Blocks_Index::5");
  }
  catch (File_Error e)
  {
    if (e.error_number != 2)
      throw e;
    index_buf.resize(0);
  }
  
  init_structure_params();
  
  if (empty_index_file_name != "")
    init_void_blocks();
}


template< class TIndex >
void File_Blocks_Index< TIndex >::init_structure_params()
{
  if (index_buf.ptr)
  {
    if (file_name_extension_ != ".legacy")
    {
      if (*(int32*)index_buf.ptr != FILE_FORMAT_VERSION)
	throw File_Error(0, index_file_name, "File_Blocks_Index: Unsupported index file format version");
      block_size_ = 1ull<<*(uint8*)(index_buf.ptr + 4);
      compression_factor = 1u<<*(uint8*)(index_buf.ptr + 5);
      compression_method = *(uint16*)(index_buf.ptr + 6);
    }      
    block_count = file_size / block_size_;
  }
}


template< class TIndex >
void File_Blocks_Index< TIndex >::init_blocks()
{
  if (index_buf.ptr)
  {
    if (file_name_extension_ == ".legacy")
      // We support this way the old format although it has no version marker.
    {
      uint32 pos = 0;
      while (pos < index_size)
      {
        TIndex index(index_buf.ptr+pos);
        File_Block_Index_Entry< TIndex >
            entry(index,
	    *(uint32*)(index_buf.ptr + (pos + TIndex::size_of(index_buf.ptr+pos))),
	    1, //block size is always 1 in the legacy format
	    *(uint32*)(index_buf.ptr + (pos + TIndex::size_of(index_buf.ptr+pos) + 4)));
        blocks.push_back(entry);
        if (entry.pos >= block_count)
	  throw File_Error(0, index_file_name, "File_Blocks_Index: bad pos in index file");
        pos += TIndex::size_of(index_buf.ptr+pos) + 8;
      }
    }
    else if (index_size > 0)
    {
      uint32 pos = 8;
      while (pos < index_size)
      {
        TIndex index(index_buf.ptr + pos + 12);
        File_Block_Index_Entry< TIndex >
            entry(index,
	    *(uint32*)(index_buf.ptr + pos),
	    *(uint32*)(index_buf.ptr + pos + 4),
	    *(uint32*)(index_buf.ptr + pos + 8));
        blocks.push_back(entry);
        if (entry.pos >= block_count)
	  throw File_Error(0, index_file_name, "File_Blocks_Index: bad pos in index file");
	pos += 12;
        pos += TIndex::size_of(index_buf.ptr + pos);
      }
    }
    
    index_buf.resize(0);
  }
}


template< class TIndex >
void File_Blocks_Index< TIndex >::init_void_blocks()
{
  if (index_buf.ptr)
    init_blocks();
  
  std::vector< bool > is_referred(block_count, false);
  for (typename std::list< File_Block_Index_Entry< TIndex > >::const_iterator it = blocks.begin();
      it != blocks.end(); ++it)
  {
    for (uint32 i = 0; i < it->size; ++i)
      is_referred[it->pos + i] = true;
  }
  
  bool empty_index_file_used = false;
  if (empty_index_file_name != "")
  {
    try
    {
      Raw_File void_blocks_file(empty_index_file_name, O_RDONLY, S_666, "");
      uint32 void_index_size = void_blocks_file.size("File_Blocks_Index::File_Blocks_Index::6");
      Void_Pointer< uint8 > index_buf(void_index_size);
      void_blocks_file.read(index_buf.ptr, void_index_size,
			      "File_Blocks_Index::File_Blocks_Index::7");
      for (uint32 i = 0; i < void_index_size/8; ++i)
        void_blocks.push_back(*(std::pair< uint32, uint32 >*)(index_buf.ptr + 8*i));
      empty_index_file_used = true;
    }
    catch (File_Error e) {}
  }
    
  if (!empty_index_file_used)
  {
    // determine void_blocks
    uint32 last_start = 0;
    for (uint32 i = 0; i < block_count; ++i)
    {
      if (is_referred[i])
      {
        if (last_start < i)
          void_blocks.push_back(std::make_pair(i - last_start, last_start));
        last_start = i+1;
      }
    }
    if (last_start < block_count)
      void_blocks.push_back(std::make_pair(block_count - last_start, last_start));
  }
    
  std::stable_sort(void_blocks.begin(), void_blocks.end());
  void_blocks_initialized = true;
}


template< class TIndex >
File_Blocks_Index< TIndex >::~File_Blocks_Index()
{
  if (empty_index_file_name == "")
    return;

  // Keep space for file version and size information
  uint32 index_size = 8;
  uint32 pos = 8;
  
  for (typename std::list< File_Block_Index_Entry< TIndex > >::const_iterator
      it(blocks.begin()); it != blocks.end(); ++it)
    index_size += 12 + it->index.size_of();
  
  Void_Pointer< uint8 > index_buf(index_size);
  
  *(uint32*)index_buf.ptr = FILE_FORMAT_VERSION;
  *(uint8*)(index_buf.ptr + 4) = shift_log(block_size_);
  *(uint8*)(index_buf.ptr + 5) = shift_log(compression_factor);
  *(uint16*)(index_buf.ptr + 6) = compression_method;
  
  for (typename std::list< File_Block_Index_Entry< TIndex > >::const_iterator
      it(blocks.begin()); it != blocks.end(); ++it)
  {
    *(uint32*)(index_buf.ptr+pos) = it->pos;
    pos += 4;
    *(uint32*)(index_buf.ptr+pos) = it->size;
    pos += 4;
    *(uint32*)(index_buf.ptr+pos) = it->max_keysize;
    pos += 4;
    it->index.to_data(index_buf.ptr+pos);
    pos += it->index.size_of();
  }

  Raw_File dest_file(index_file_name, O_RDWR|O_CREAT, S_666,
		     "File_Blocks_Index::~File_Blocks_Index::1");

  if (index_size < dest_file.size("File_Blocks_Index::~File_Blocks_Index::2"))
    dest_file.resize(index_size, "File_Blocks_Index::~File_Blocks_Index::3");
  dest_file.write(index_buf.ptr, index_size, "File_Blocks_Index::~File_Blocks_Index::4");
  
  // Write void blocks
  Void_Pointer< uint8 > void_index_buf(void_blocks.size() * 8);
  std::pair< uint32, uint32 >* it_ptr = (std::pair< uint32, uint32 >*)(void_index_buf.ptr);
  for (std::vector< std::pair< uint32, uint32 > >::const_iterator it(void_blocks.begin());
      it != void_blocks.end(); ++it)
    *(it_ptr++) = *it;
  
  try
  {
    Raw_File void_file(empty_index_file_name, O_RDWR|O_TRUNC, S_666,
		       "File_Blocks_Index::~File_Blocks_Index::5");
    void_file.write(void_index_buf.ptr, void_blocks.size()*sizeof(uint32),
		    "File_Blocks_Index::~File_Blocks_Index::6");
  }
  catch (File_Error e) {}
}

/** Implementation non-members: ---------------------------------------------*/

template< class TIndex >
std::vector< bool > get_data_index_footprint
    (const File_Properties& file_prop, std::string db_dir)
{
  File_Blocks_Index< TIndex > index(file_prop, false, false, db_dir, "");
  
  std::vector< bool > result(index.block_count, true);
  for (typename std::vector< std::pair< uint32, uint32 > >::const_iterator
      it = index.get_void_blocks().begin(); it != index.get_void_blocks().end(); ++it)
  {
    for (uint32 i = 0; i < it->first; ++i)
      result[it->second + i] = false;
  }
  return result;
}

#endif



Archive powered by MHonArc 2.6.19+.

Top of Page