Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
// cedar -- C++ implementation of Efficiently-updatable Double ARray trie
// $Id: bench.cc 1913 2016-01-19 12:55:27Z ynaga $
// Copyright (c) 2013-2015 Naoki Yoshinaga
#ifdef __APPLE__
#include
#endif
#include
#include
#include
#include
#include
#include // for ternary search tree
#include
#include
#include
// C++ containers
#include
// cedar variants
#ifdef USE_PREFIX_TRIE
#include
#else
#include
#endif
// Google's containers and variants
#include
#include
// Cache-concious containers
#include
#ifdef USE_HAT
#include
#endif
#ifdef USE_AHASH
#include
#endif
#ifdef USE_HHASH
#include
#endif
#include
struct str_hash {
size_t operator () (const char* key) const
{ return CityHash64 (key, std::strlen (key)); }
size_t operator () (const char* key, size_t key_size) const
{ return CityHash64 (key, key_size); }
};
struct str_equal {
bool operator () (const char* p, const char* q) const {
// return std::strcmp (p, q) == 0; // slow
for (; *p || *q; ++q, ++p)
if (*p != *q)
return false; // reject immediately
return true;
}
};
// typedef
#if defined (USE_CEDAR_UNORDERED)
typedef cedar::da cedar_t;
#else
typedef cedar::da cedar_t;
#endif
typedef std::unordered_map hash_t;
typedef google::dense_hash_map gdhash_t;
typedef spp::sparse_hash_map shash_t;
typedef Pvoid_t judy_t;
#ifdef USE_AHASH
typedef tsl::array_map ahash_t;
#endif
#ifdef USE_HAT
typedef tsl::htrie_map hat_t;
#endif
#ifdef USE_HHASH
typedef tsl::hopscotch_map hhash_t;
#endif
size_t get_process_size () {
#ifdef __APPLE__
struct task_basic_info t_info;
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
task_info (current_task (), TASK_BASIC_INFO,
reinterpret_cast (&t_info), &t_info_count);
return t_info.resident_size;
#else
FILE* fp = std::fopen("/proc/self/statm", "r");
size_t dummy (0), vm (0);
std::fscanf (fp, "%ld %ld ", &dummy, &vm); // get resident (see procfs)
std::fclose (fp);
return vm * ::getpagesize ();
#endif
}
size_t read_data (const char* file, char*& data) {
int fd = ::open (file, O_RDONLY);
if (fd < 0)
{ std::fprintf (stderr, "no such file: %s\n", file); std::exit (1); }
size_t size = static_cast (::lseek (fd, 0L, SEEK_END));
data = new char[size];
::lseek (fd, 0L, SEEK_SET);
::read (fd, data, size);
::close (fd);
return size;
}
#ifdef USE_BINARY_DATA
#define KEY_SEP '\0'
inline char* find_sep (char* p) { while (*p != '\0') ++p; return p; }
#else
#define KEY_SEP '\n'
inline char* find_sep (char* p) { while (*p != '\n') ++p; *p = '\0'; return p; }
#endif
template
inline T* create () { return new T (); }
template <>
inline gdhash_t* create () { gdhash_t* p = new gdhash_t; p->set_empty_key (""); return p; }
template
inline void destroy (T* t) {
for (typename T::const_iterator it = t->begin (); it != t->end (); ++it)
delete [] it->first;
delete t;
}
template <>
inline void destroy (cedar_t* t) { delete t; }
#ifdef USE_HAT
template <>
inline void destroy (hat_t* t) { delete t; }
#endif
#ifdef USE_AHASH
template <>
inline void destroy (ahash_t* t) { delete t; }
#endif
template <>
inline void destroy (judy_t* t) { Word_t bytes = 0; JSLFA (bytes, *t); delete t; }
// avoid to new temporary objects regarding std::string
template // hash
inline void insert_key (T* t, const char* key, size_t len, int n) {
t->insert ({::strdup (key), n});
}
template // hash
inline bool lookup_key (const T& t, const char* key, size_t len)
{ return t.find (key) != t.end (); }
// hat-trie
#ifdef USE_HAT
template <>
inline void insert_key (hat_t* t, const char* key, const size_t len, int n)
{ t->insert_ks (key, len, n); }
template <>
inline bool lookup_key (const hat_t& t, const char* key, const size_t len)
{ return t.find_ks (key, len) != t.end (); }
#endif
// array-hash
#ifdef USE_AHASH
template <>
inline void insert_key (ahash_t* t, const char* key, const size_t len, int n)
{ t->insert_ks (key, len, n); }
template <>
inline bool lookup_key (const ahash_t& t, const char* key, const size_t len)
{ return t.find_ks (key, len) != t.end (); }
#endif
// cedar
template <>
inline void insert_key (cedar_t* t, const char* key, const size_t len, int n)
{ t->update (key, len) = n; }
template <>
inline bool lookup_key (const cedar_t& t, const char* key, const size_t len)
{ return t.exactMatchSearch (key, len) >= 0; }
// judy array
template <>
inline void insert_key (judy_t* t, const char* key, const size_t len, int n)
{ Word_t* PValue = 0; JSLI (PValue, *t, key); *PValue = n; }
template <>
inline bool lookup_key (const judy_t& t, const char* key, size_t len)
{ PWord_t PValue = 0; JSLG (PValue, t, key); return PValue; }
template
void insert (T* t, char* data, size_t size, int& n) {
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
insert_key (t, start, end - start, ++n);
}
}
// lookup
template
void lookup (const T& t, char* data, size_t size, int& n_, int& n) {
for (char* start (data), *end (data), *tail (data + size);
end != tail; start = ++end) {
end = find_sep (end);
if (lookup_key (t, start, end - start))
++n_;
++n;
}
}
template
void bench (const char* keys, const char* queries, const char* label) {
std::fprintf (stderr, "---- %-25s --------------------------\n", label);
//
T* t = create ();
struct timeval st, et;
{
// build trie
char* data = 0;
const size_t size = read_data (keys, data);
size_t rss = get_process_size ();
std::fprintf (stderr, "%-20s %.2f MiB (%ld bytes)\n",
"Init RSS:", rss / 1048576.0, rss);
int n = 0;
::gettimeofday (&st, NULL);
insert (t, data, size, n);
::gettimeofday (&et, NULL);
double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
"Time to insert:", elapsed, elapsed * 1e9 / n);
std::fprintf (stderr, "%-20s %d\n", "Words:", n);
delete [] data;
}
if (std::strcmp (queries, "-") != 0) {
// load data
char* data = 0;
const size_t size = read_data (queries, data);
// search
int n (0), n_ (0);
::gettimeofday (&st, NULL);
lookup (*t, data, size, n_, n);
::gettimeofday (&et, NULL);
double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
"Time to search:", elapsed, elapsed * 1e9 / n);
std::fprintf (stderr, "%-20s %d\n", "Words:", n);
std::fprintf (stderr, "%-20s %d\n", "Found:", n_);
delete [] data;
}
destroy (t);
}
int main (int argc, char** argv) {
if (argc < 3)
{ std::fprintf (stderr, "Usage: %s keys queries\n", argv[0]); std::exit (1); }
//
#ifdef USE_CEDAR
#if defined (USE_PREFIX_TRIE)
bench (argv[1], argv[2], "cedar (prefix)");
#elif defined (USE_REDUCED_TRIE)
bench (argv[1], argv[2], "cedar (reduced)");
#else
bench (argv[1], argv[2], "cedar");
#endif
#endif
#ifdef USE_CEDAR_UNORDERED
#if defined (USE_PREFIX_TRIE)
bench (argv[1], argv[2], "cedar unordered (prefix)");
#elif defined (USE_REDUCED_TRIE)
bench (argv[1], argv[2], "cedar unordered (reduced)");
#else
bench (argv[1], argv[2], "cedar unordered");
#endif
#endif
#ifdef USE_HASH
bench (argv[1], argv[2], "hash");
#endif
#ifdef USE_GOOGLE_DHASH
bench (argv[1], argv[2], "gdhash");
#endif
#ifdef USE_SHASH
bench (argv[1], argv[2], "shash");
#endif
#ifdef USE_SPLAY
bench (argv[1], argv[2], "splay");
#endif
#ifdef USE_JUDY
bench (argv[1], argv[2], "judy");
#endif
#ifdef USE_HAT
bench (argv[1], argv[2], "hat");
#endif
#ifdef USE_AHASH
bench (argv[1], argv[2], "ahash");
#endif
#ifdef USE_HHASH
bench (argv[1], argv[2], "hhash");
#endif
}