2016-05-01 14 views
2

私は比較的新しいプログラミングC++です。私は子供のノードを格納するツリーデータ構造の実装でunorderd_mapを使用してdbのインデックスのようなツリーを実装しています。私は木のような構造体で働くので、検索メソッドは再帰的です。また、ノードのポインタも格納していますので、メモリの問題がうまく処理されていない可能性があります。セグメンテーション違反が発生しています。次は私のコードとその出力です。再帰関数unordered_map unordered_setを使用したセグメンテーションエラー

#include <memory> 
#include <sstream> 
#include <unordered_map> 
#include <iostream> 
#include <string> 
#include <sqlite3.h> 
#include "aux_functions.cpp" 
#include <math.h> 

using namespace std; 


class TreeLikeIndex 
{ 
    public: 
    TreeLikeIndex(string attribute, string indices, int indices_count, short int is_leaf, unordered_map<string, TreeLikeIndex*> children); 
    TreeLikeIndex(string indices, int indices_count); 
    TreeLikeIndex(); 
    string search(unordered_map<string, string> *); 
    private: 
    string indices; 
    int indices_count; 
    short int is_leaf; 
    string attribute; 
    unordered_map<string, TreeLikeIndex*> children; 

}; 


string TreeLikeIndex::search(unordered_map<string, string> * _tuple) 
{ 
    if((*_tuple).empty() || this->is_leaf) return this->indices; 
    string att_val = (*_tuple)[this->attribute]; 
    (*_tuple).erase(this->attribute); 
    TreeLikeIndex * child_with_that_value = this->children[att_val]; 
    return (*child_with_that_value).search(_tuple); 
} 



class DecisionTreeLikeIndexer 
{ 

public: 
    DecisionTreeLikeIndexer(string, string, string); 
    int rebuild_index(); 
    TreeLikeIndex * get_index(); 

private: 
    TreeLikeIndex * build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count); 
    TreeLikeIndex * index; 
    string source_db_address; 
    string dest_folder_address; 
    time_t time_of_last_build; 
    unordered_set<string> columns_names; 
    string source_table_name; 
    unordered_set<string> temp_tables_names; 
    string id_column_name; 
    sqlite3 * source_db_connection; 
    int table_count; 
}; 

int DecisionTreeLikeIndexer::rebuild_index() 
{ 
    this->index = this->build_index(this->columns_names, 0, "", 0); 
    this->time_of_last_build = time(NULL); 
    return 0; 
} 

TreeLikeIndex * DecisionTreeLikeIndexer::get_index() 
{ 
    return this->index; 
} 

DecisionTreeLikeIndexer::DecisionTreeLikeIndexer(string source_db_address, string table_name, string dest_folder_address) 
{ 
    this->source_db_address = source_db_address; 
    this->dest_folder_address = dest_folder_address; 
    this->columns_names = Aux::get_column_names(source_db_address, table_name); 
    this->source_table_name = table_name; 
    this->id_column_name = "rowid"; 
    this->source_db_connection = Aux::get_db_connection(this->source_db_address); 

    // Getting count of this table 

    sqlite3_stmt* statement; 
    string query = "SELECT count(*) FROM " + this->source_table_name + ";"; 
    if(sqlite3_prepare(this->source_db_connection, query.c_str(), -1, &statement, 0) == SQLITE_OK) 
    { 
    int res = sqlite3_step(statement); 
    const unsigned char * count_char = sqlite3_column_text(statement,0); 
    if(res == SQLITE_ROW) 
    { 
     stringstream _temp; 
     _temp << count_char; 
     _temp >> this->table_count;  
    } 
    sqlite3_finalize(statement);  
    } 
    else 
    { 
    cout << "Error initializating Indexer (Getting initial table count): " << sqlite3_errmsg(this->source_db_connection) << endl; 
    } 

} 

TreeLikeIndex * DecisionTreeLikeIndexer::build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count) 
{ 

    if(attributes_list.size() <=1 || (depth > 0 && ids_list_count <= 1)) 
    { 
    Aux::tabs(depth); 
    cout << "Leaf at depth: " << depth << " Ids are: " << comma_separated_ids << " Ids count: " << ids_list_count << endl; 
    static TreeLikeIndex * node = new TreeLikeIndex((string)comma_separated_ids, (int)ids_list_count); 
    return node; 
    } 

    string source_table = this->source_table_name; 
    int count = this->table_count; 

    if(depth > 0) 
    { 
    while(1) 
    { 
    source_table = *Aux::get_random_list_of_strings(1).begin(); 
    if(this->temp_tables_names.insert(source_table).second) break; 
    } 

    const string create_temp_table_stmnt = "CREATE TEMP TABLE " + source_table + " AS SELECT * FROM " + this->source_table_name + " WHERE " + this->id_column_name + " IN(" + comma_separated_ids + ")"; 
    sqlite3_exec(this->source_db_connection, create_temp_table_stmnt.c_str(),Aux::sqlt_callback,0,NULL); 
    count = ids_list_count; 
    Aux::tabs(depth); 
    cout << "Not root node" << endl; 
    } 

    Aux::tabs(depth); 
    cout << "Source table is: " << source_table << " Table count is: " << count << endl; 
    Aux::tabs(depth); 
    cout << "Attributes list is: "; for_each(attributes_list.begin(), attributes_list.end(),[](string v){cout << v << " ";}); 
    cout << endl; 
    const double E = log2(count) ; 
    Aux::tabs(depth); 
    cout << "Entropy of node: " << E << endl; 
    string best_attribute; 
    double best_gain; 
    unordered_set<string> best_attribute_values; 

    for(string attr: attributes_list) 
    { 
    Aux::tabs(depth+1); 
cout << "Analysing attribute: " << attr << endl; 
const string get_at_count_values_query = "SELECT " + attr + ", count(" + attr + ") FROM " + source_table + " GROUP BY " + attr + ";"; 
    sqlite3_stmt * stmnt; 
    double weighted_entropy = 0; 
    unordered_set<string> this_att_values; 
    if(sqlite3_prepare(this->source_db_connection, get_at_count_values_query.c_str(), -1, &stmnt, 0) == SQLITE_OK) 
    { 
     for(;;) 
     { 


     int res = sqlite3_step(stmnt); 

     if(res == SQLITE_DONE || res==SQLITE_ERROR) 
     { 
     double gAti = E - weighted_entropy; 
     Aux::tabs(depth+1); 
     cout << "Finish computing WE for att: " << attr << " Gain is: " << gAti << endl; 
     if(gAti > best_gain) 
     { 
      Aux::tabs(depth+1); 
      cout << "Found attribute with better gain." << endl; 
      best_gain = gAti; 
      best_attribute = attr; 
      best_attribute_values.clear(); 

     Aux::tabs(depth+1); 
     for(string v:this_att_values) 
      { 
      best_attribute_values.insert(v); 
      } 
      cout << endl; 

      this_att_values.clear(); 
     } 
     sqlite3_finalize(stmnt); 
     //delete &res; 
     break; 
     } 
     if(res == SQLITE_ROW) 
     { 

     string val = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
     int vSize = sqlite3_column_int(stmnt,1); 
     Aux::tabs(depth+2); 
     this_att_values.insert(val); 
     double ratio = double(vSize)/double(count); 
     weighted_entropy += double(ratio) * double(log2(vSize)); 
     Aux::tabs(depth+2); 
     cout << "Processing value: " << val << " With vSize: " << vSize << " Current WE is: " << weighted_entropy << endl; 
     } 
    } 
    } 
} 

    Aux::tabs(depth); 
    cout << "Finish processing attributes list. Best attribute is: " <<  best_attribute << " Best gain is: " << best_gain << endl; 
    Aux::tabs(depth); 
    cout << "Best attribute values are: ";  for_each(best_attribute_values.begin(), best_attribute_values.end(), [](string v){cout << v << ",";}); cout << endl; 
    unordered_map<string, TreeLikeIndex *> children; 
    for(string val: best_attribute_values) 
    { 

    const string get_ids_of_bestatt_val = "SELECT rowid FROM " + source_table + " WHERE " + best_attribute + " = " + val + ";"; 
    int ids_count = 0; 
    sqlite3_stmt * stmnt; 
    string ids = ""; 
    bool first = 1; 
    int next_depth = depth + 1; 
    unordered_set<string> next_attributes_set; 

    for(string attr: attributes_list) if(attr != best_attribute) next_attributes_set.insert(attr); 

    if(sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK) 
    { 
     for(;;) 
     { 
     int res = sqlite3_step(stmnt); 

     if(res == SQLITE_ROW) 
      { 
      string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
      if(!first) ids += "," + id; 
      else ids += id; 
      ids_count++; 

      } 

     if(res == SQLITE_DONE || res == SQLITE_ERROR) 
      { 
      Aux::tabs(depth+1); 
      cout << "Adding branch for val: " << val << endl; 
      Aux::tabs(depth+1); 
      cout << " Next attributes are: ";  for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";}); 
      cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl; 
      sqlite3_finalize(stmnt); 
      static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count); 
      pair<string, TreeLikeIndex*> child (val, temp_child); 
     children.insert(child); 
      } 
     } 
    } 
    } 

    Aux::tabs(depth); 
    cout << "Finish processing node, will return." << endl; 
    static TreeLikeIndex * no_leaf_node = new TreeLikeIndex(best_attribute, "all", count, 0, children); 
    return no_leaf_node; 

} 
} 

TreeLikeIndex::TreeLikeIndex(std::string attribute, std::string indices, int indices_count, short int is_leaf, unordered_map<std::string, TreeLikeIndex*> children) 
{ 
    this->attribute = attribute; 
    this->indices = indices; 
    this->is_leaf = is_leaf; 
    this->children = children; 
    this->children.clear(); 
    for(pair<string, TreeLikeIndex*> p: children) this->children.insert(p); 
    this->indices_count = indices_count; 
} 

TreeLikeIndex::TreeLikeIndex(string indices, int indices_count) 
{ 
    this->indices = indices; 
    this->indices_count = indices_count; 
    this->is_leaf = 1; 
} 

TreeLikeIndex::TreeLikeIndex() 
{ 
    this->indices = ""; 
    this->indices_count = 0; 
    this->is_leaf = 1; 
} 




int main() 
{ 

string source_db_address = "my_table"; 
string table_name = "b"; 
string dest_folder_address = "."; 

DecisionTreeLikeIndexer indexer(source_db_address, table_name, dest_folder_address); 
indexer.rebuild_index(); 
} 

、出力は次のとおりです。私は

をSHUREけど....ないよ

Source table is: b Table count is: 9 
Attributes list is: cant_n_dec cant_n_des cant_n_control 
Entropy of node: 3.16993 
    Analysing attribute: cant_n_dec 
        Processing value: 1 With vSize: 1 Current WE is: 0 
        Processing value: 2 With vSize: 4 Current WE is: 0.888889 
        Processing value: 3 With vSize: 2 Current WE is: 1.11111 
        Processing value: 4 With vSize: 1 Current WE is: 1.11111 
        Processing value: 5 With vSize: 1 Current WE is: 1.11111 
    Finish computing WE for att: cant_n_dec Gain is: 2.05881 
    Found attribute with better gain. 

    Analysing attribute: cant_n_des 
        Processing value: 1 With vSize: 2 Current WE is: 0.222222 
        Processing value: 2 With vSize: 4 Current WE is: 1.11111 
        Processing value: 3 With vSize: 2 Current WE is: 1.33333 
        Processing value: 5 With vSize: 1 Current WE is: 1.33333 
    Finish computing WE for att: cant_n_des Gain is: 1.83659 
    Analysing attribute: cant_n_control 
        Processing value: 1 With vSize: 2 Current WE is: 0.222222 
        Processing value: 2 With vSize: 3 Current WE is: 0.750543 
        Processing value: 3 With vSize: 3 Current WE is: 1.27886 
        Processing value: 5 With vSize: 1 Current WE is: 1.27886 
    Finish computing WE for att: cant_n_control Gain is: 1.89106 
Finish processing attributes list. Best attribute is: cant_n_dec Best gain is: 2.05881 
Best attribute values are: 1,2,3,4,5, 
    Adding branch for val: 1 
    Next attributes are: cant_n_control,cant_n_des, Depth is: 1 Ids are: 3 Ids count: 1 
    Leaf at depth: 1 Ids are: 3 Ids count: 1 
Segmentation fault 
+0

デバッガに基づいてセグメンテーションフォルトを引き起こした行をデバッガで1行ずつステップアップしたときに、どのような行が表示されましたか? –

+0

私は印刷技術(cout <<)をデバッグ技術として使用しています。プログラムの実行は、最初の(初期の)1ステップから再帰的コンテキストに入り、ポインタをリーフノードに返します。このポインタは、挿入を使用して最初の(初期)コンテキストでunordered_mapに格納されます。次のようなものです:T * obj = recursive_call(); some_pair( "key"、obj); uMap.insert(some_pair);それはクラッシュするところです。 –

+0

余分なprintステートメントを置くことは、もっとも簡単なバグではありません。デバッガの使い方を学ぶことは、すべてのC++開発者にとって必要不可欠なスキルです。デバッガから得られる情報は、基本的な印刷ステートメントよりも無限に役立ちます。このプログラムを別にして、デバッガの使用方法を学んでください。 –

答えて

0

私はこの問題は、次のサイクル

for(;;) 
    { 
    int res = sqlite3_step(stmnt); 

    if(res == SQLITE_ROW) 
     { 
     string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
     if(!first) ids += "," + id; 
     else ids += id; 
     ids_count++; 

     } 

    if(res == SQLITE_DONE || res == SQLITE_ERROR) 
     { 
     Aux::tabs(depth+1); 
     cout << "Adding branch for val: " << val << endl; 
     Aux::tabs(depth+1); 
     cout << " Next attributes are: ";  for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";}); 
     cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl; 
     sqlite3_finalize(stmnt); 
     static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count); 
     pair<string, TreeLikeIndex*> child (val, temp_child); 
    children.insert(child); 
     } 
    } 

であることができると思います私は終了時に理解していません(for(;;)の出口条件なし、returnとなしブロック内の)。

そして

int res = sqlite3_step(stmnt); 

、(

sqlite3_finalize(stmnt); 

への呼び出しwhith)SQLITE_DONEまたはSQLITE_ERROR場合後、サイクルが繰り返されるとき、私はチェセグメンテーションフォールトが次の命令によって引き起こされていると思われます再びstmntが無効です。

以下は解決策ですか?

if (sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK) 
{ 
    while (sqlite3_step(stmnt) == SQLITE_ROW) 
    { 
     ids += (first ? "" : ",") 
     + std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
     ids_count++; 
    } 

    Aux::tabs(depth+1); 
    cout << "Adding branch for val: " << val << endl; 
    Aux::tabs(depth+1); 
    cout << " Next attributes are: "; 
    for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";}); 
    cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl; 
    sqlite3_finalize(stmnt); 
    static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count); 
    pair<string, TreeLikeIndex*> child (val, temp_child); 
    children.insert(child); 
} 
+0

今、それは動作します、ありがとう、私はそれを見ていないか分からない。 –