Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
// cedar -- C++ implementation of Efficiently-updatable Double ARray trie
// $Id: bench_static.cc 1879 2015-01-29 12:03:37Z ynaga $
// Copyright (c) 2013-2015 Naoki Yoshinaga
#ifdef __APPLE__
#include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef USE_PREFIX_TRIE
#include
#else
#include
#endif
#if defined (USE_DARTS_CLONE)
#include
#elif defined (USE_DARTS_CLONE_OLD)
#include
#else
#include
#endif
#include
#include
#include
// typedef
#if defined (USE_CEDAR_UNORDERED)
typedef cedar::da cedar_t;
#else
typedef cedar::da cedar_t;
#endif
typedef Darts::DoubleArray darts_t;
typedef tx_tool::tx tx_t;
typedef ux::Trie ux_t;
typedef marisa::Trie marisa_t;
size_t get_process_size () {
#ifdef __APPLE__
struct task_basic_info t_info;
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
task_info (current_task (), TASK_BASIC_INFO,
reinterpret_cast (&t_info), &t_info_count);
return t_info.resident_size;
#else
FILE* fp = std::fopen("/proc/self/statm", "r");
size_t dummy (0), vm (0);
std::fscanf (fp, "%ld %ld ", &dummy, &vm); // get resident (see procfs)
std::fclose (fp);
return vm * ::getpagesize ();
#endif
}
size_t read_data (const char* file, char*& data) {
int fd = ::open (file, O_RDONLY);
if (fd < 0)
{ std::fprintf (stderr, "no such file: %s\n", file); std::exit (1); }
size_t size = static_cast (::lseek (fd, 0L, SEEK_END));
data = new char[size];
::lseek (fd, 0L, SEEK_SET);
::read (fd, data, size);
::close (fd);
return size;
}
#ifdef USE_BINARY_DATA
#define KEY_SEP '\0'
inline char* find_sep (char* p) { while (*p != '\0') ++p; return p; }
#else
#define KEY_SEP '\n'
inline char* find_sep (char* p) { while (*p != '\n') ++p; *p = '\0'; return p; }
#endif
template
inline T* create () { return new T (); }
template
inline void destroy (T* t) { delete t; }
// darts/darts-clone
template
void build (T* t, char* data, size_t size, int& n, const char* index) {
std::vector key;
std::vector len;
std::vector val;
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
key.push_back (::strdup (start));
len.push_back (end - start);
val.push_back (++n);
}
t->build (key.size (), &key[0], &len[0], &val[0]);
t->save (index);
for (std::vector ::iterator it = key.begin ();
it != key.end (); ++it)
::free (const_cast (*it));
}
// cedar
template <>
void build (cedar_t* t, char* data, size_t size, int& n, const char* index) {
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
t->update (start, end - start) = ++n;
}
t->save (index);
}
// tx
template <>
void build (tx_t* t, char* data, size_t size, int& n, const char* index) {
std::vector str;
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
str.push_back (start);
++n;
}
t->build (str, index);
}
// ux
template <>
void build (ux_t* t, char* data, size_t size, int& n, const char* index) {
std::vector str;
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
str.push_back (start);
++n;
}
t->build (str);
t->save (index);
}
// marisa
template <>
void build (marisa_t* t, char* data, size_t size, int& n, const char* index) {
marisa::Keyset keyset;
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
keyset.push_back (start);
++n;
}
t->build (keyset);
t->save (index);
}
// cedar/darts/darts-clone
template
inline bool lookup_key (const T& t, const char* key, size_t len)
{ return t.template exactMatchSearch (key, len) >= 0; }
// tx
template <>
inline bool lookup_key (const tx_t& t, const char* key, size_t len)
{ size_t n = 0; return t.prefixSearch (key, len, n) != tx_tool::tx::NOTFOUND && n == len; }
// ux
template <>
inline bool lookup_key (const ux_t& t, const char* key, size_t len)
{ size_t n = 0; return t.prefixSearch (key, len, n) != ux::NOTFOUND && n == len; }
// marisa
template <>
inline bool lookup_key (const marisa_t& t, const char* key, size_t len)
{ static marisa::Agent agent; agent.set_query (key, len); return t.lookup (agent); }
// lookup
template
void lookup (const T& t, char* data, size_t size, int& n_, int& n) {
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
if (lookup_key (t, start, end - start))
++n_;
++n;
}
}
// cedar/darts/darts-clone
template
void read_trie (T* t, const char* index) { t->open (index); }
// tx
template <>
void read_trie (tx_t* t, const char* index) { t->read (index); }
// ux
template <>
void read_trie (ux_t* t, const char* index) { t->load (index); }
// marisa-trie
template <>
void read_trie (marisa_t* t, const char* index) { t->load (index); }
size_t get_size (const char* index) {
int fd = ::open (index, O_RDONLY);
if (fd < 0)
{ std::fprintf (stderr, "no such file: %s\n", index); std::exit (1); }
size_t size = static_cast (::lseek (fd, 0L, SEEK_END));
::close (fd);
return size;
}
template
void bench (const char* keys, const char* index, const char* queries, const char* label) {
std::fprintf (stderr, "---- %-25s --------------------------\n", label);
//
T* t = create ();
struct timeval st, et;
if (std::strcmp (keys, "-") != 0) {
// build trie
char* data = 0;
const size_t size = read_data (keys, data);
size_t rss = get_process_size ();
std::fprintf (stderr, "%-20s %.2f MiB (%ld bytes)\n",
"Init RSS:", rss / 1048576.0, rss);
int n = 0;
::gettimeofday (&st, NULL);
build (t, data, size, n, index);
::gettimeofday (&et, NULL);
double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
"Time to insert:", elapsed, elapsed * 1e9 / n);
std::fprintf (stderr, "%-20s %d\n", "Words:", n);
// trie size
rss = get_size (index);
std::fprintf (stderr, "%-20s %.2f MiB (%ld bytes)\n",
"Trie size:", rss / 1048576.0, rss);
delete [] data;
} else if (std::strcmp (queries, "-") != 0) {
// load data
char* data = 0;
const size_t size = read_data (queries, data);
// search
read_trie (t, index);
int n (0), n_ (0);
::gettimeofday (&st, NULL);
lookup (*t, data, size, n_, n);
::gettimeofday (&et, NULL);
double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
"Time to search:", elapsed, elapsed * 1e9 / n);
std::fprintf (stderr, "%-20s %d\n", "Words:", n);
std::fprintf (stderr, "%-20s %d\n", "Found:", n_);
delete [] data;
}
destroy (t);
}
int main (int argc, char** argv) {
if (argc < 4)
{ std::fprintf (stderr, "Usage: %s keys index queries\n", argv[0]); std::exit (1); }
//
#ifdef USE_CEDAR
#if defined (USE_PREFIX_TRIE)
bench (argv[1], argv[2], argv[3], "cedar (prefix)");
#elif defined (USE_REDUCED_TRIE)
bench (argv[1], argv[2], argv[3], "cedar (reduced)");
#else
bench (argv[1], argv[2], argv[3], "cedar");
#endif
#endif
#ifdef USE_CEDAR_UNORDERED
#if defined (USE_PREFIX_TRIE)
bench (argv[1], argv[2], argv[3], "cedar unordered (prefix)");
#elif defined (USE_REDUCED_TRIE)
bench (argv[1], argv[2], argv[3], "cedar unordered (reduced)");
#else
bench (argv[1], argv[2], argv[3], "cedar unordered");
#endif
#endif
#ifdef USE_DARTS
bench (argv[1], argv[2], argv[3], "darts");
#endif
#ifdef USE_DARTS_CLONE
bench (argv[1], argv[2], argv[3], "darts-clone");
#endif
#ifdef USE_DARTS_CLONE_OLD
bench (argv[1], argv[2], argv[3], "darts-clone_old");
#endif
#ifdef USE_TX
bench (argv[1], argv[2], argv[3], "tx");
#endif
#ifdef USE_UX
bench (argv[1], argv[2], argv[3], "ux");
#endif
#ifdef USE_MARISA
bench (argv[1], argv[2], argv[3], "marisa-trie");
#endif
}