-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwrite_key_id.cpp
More file actions
108 lines (98 loc) · 2.74 KB
/
Copy pathwrite_key_id.cpp
File metadata and controls
108 lines (98 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
* =====================================================================================
*
* Filename: write_key_id.cpp
*
* Description:
*
* Version: 1.0
* Created: 2012/03/15 18时03分44秒
* Revision: none
* Compiler: gcc
*
* Author: YOUR NAME (),
* Company:
*
* =====================================================================================
*/
#include <stdio.h>
#include "bobhash.h"
#include "hash_dict.h"
int split(char const *line, std::vector<std::string> & result)
{
char const *p = line;
char const *pre = p;
int num=0;
while(*p != '\0') {
if (*p == '\t') {
result.push_back(std::string(pre, p-pre));
pre = p+1;
++num;
}
++p;
}
if (pre < p) {
result.push_back(std::string(pre, p-pre));
++num;
}
return num;
}
int main(int argc, char **argv)
{
char line[10240];
if(argc < 3) {
fprintf(stderr, "usage:%s file index_num index_expand=0, align=0\n", argv[0]);
return 1;
}
char const * file=argv[1];
int index_num=atoi(argv[2]);
int index_expand = 0;
int align = 0;
if (argc > 3) {
index_expand = atoi(argv[3]);
}
if (argc > 4) {
align = atoi(argv[4]);
}
CreateHashDict<uint64_t> dict;
dict.create(file, index_num, index_expand, align);
long long int pre_hash_id=0;
std::vector<std::string> words;
KVGroup group;
while(fgets(line, sizeof(line), stdin)){
words.clear();
split(line, words);
if (words.size() < 3) {
fprintf(stderr, "error too less [%ld]fields:%s\n", words.size(), line);
continue;
}
long long int hash_id = atoll(words[0].c_str());
std::string value;
std::vector<int32_t> ids;
for(size_t i=2; i<words.size(); ++i) {
int32_t id = atoi(words[i].c_str());
//fprintf(stderr, "ids:%s, [%d]", words[i].c_str(), id);
ids.push_back(id);
}
KV kv;
kv.key = words[1];
//kv.value.resize(ids.size()*sizeof(int32_t));
kv.value= std::string((char const *) (&ids[0]), (char const *)(&ids[0])+ids.size()*sizeof(int32_t));
if (pre_hash_id == hash_id) {
group.group.push_back(kv);
continue;
}
if (!group.group.empty()) {
dict.write_data(pre_hash_id, group);
group.group.clear();
//fprintf(stderr, "process hash_id:%lld\n", pre_hash_id);
}
pre_hash_id = hash_id;
group.group.push_back(kv);
}
if (!group.group.empty()) {
dict.write_data(pre_hash_id, group);
group.group.clear();
}
return 0;
}