From 9d1d65b08c4a4c33923935a5414d6e6f4fe59ceb Mon Sep 17 00:00:00 2001 From: ipknHama Date: Sat, 2 Aug 2014 06:30:36 +0900 Subject: implementing mustache specs except partial and lambdas --- json.h | 189 ++++++++++++++++++---------- mustache.h | 342 +++++++++++++++++++++++++++++++++++++++----------- template_test/test.py | 19 ++- unittest.cpp | 33 ++++- 4 files changed, 429 insertions(+), 154 deletions(-) diff --git a/json.h b/json.h index bb67d72..3d22b1e 100644 --- a/json.h +++ b/json.h @@ -11,18 +11,13 @@ #include #include -#ifdef __GNUG__ -#define crow_json_likely(x) __builtin_expect(x, 1) -#define crow_json_unlikely(x) __builtin_expect(x, 0) -#else -#ifdef __clang__ +#if defined(__GNUG__) || defined(__clang__) #define crow_json_likely(x) __builtin_expect(x, 1) #define crow_json_unlikely(x) __builtin_expect(x, 0) #else #define crow_json_likely(x) x #define crow_json_unlikely(x) x #endif -#endif namespace crow @@ -39,12 +34,6 @@ namespace crow // TODO return str; } - std::string unescape(const std::string& str) - { - // TODO - return str; - } - enum class type : char { @@ -70,8 +59,8 @@ namespace crow boost::equality_comparable { r_string() {}; - r_string(const char* s, uint32_t length, uint8_t has_escaping) - : s_(s), length_(length), has_escaping_(has_escaping) + r_string(char* s, char* e) + : s_(s), e_(e) {}; ~r_string() { @@ -92,8 +81,7 @@ namespace crow r_string& operator = (r_string&& r) { s_ = r.s_; - length_ = r.length_; - has_escaping_ = r.has_escaping_; + e_ = r.e_; owned_ = r.owned_; return *this; } @@ -101,32 +89,26 @@ namespace crow r_string& operator = (const r_string& r) { s_ = r.s_; - length_ = r.length_; - has_escaping_ = r.has_escaping_; + e_ = r.e_; owned_ = 0; return *this; } operator std::string () const { - return unescape(); + return std::string(s_, e_); } - std::string unescape() const - { - // TODO - return std::string(begin(), end()); - } const char* begin() const { return s_; } - const char* end() const { return s_+length_; } + const char* end() const { return e_; } + size_t size() const { return end() - begin(); } using iterator = const char*; using const_iterator = const char*; - const char* s_; - uint32_t length_; - uint8_t has_escaping_; + char* s_; + mutable char* e_; uint8_t owned_{0}; friend std::ostream& operator << (std::ostream& os, const r_string& s) { @@ -134,10 +116,9 @@ namespace crow return os; } private: - void force(const char* s, uint32_t length) + void force(char* s, uint32_t length) { s_ = s; - length_ = length; owned_ = 1; } friend rvalue crow::json::load(const char* data, size_t size); @@ -171,6 +152,7 @@ namespace crow class rvalue { + static const int cached_bit = 2; static const int error_bit = 4; public: rvalue() noexcept : option_{error_bit} @@ -178,17 +160,11 @@ namespace crow rvalue(type t) noexcept : lsize_{}, lremain_{}, t_{t} {} - rvalue(type t, const char* s, const char* e) noexcept + rvalue(type t, char* s, char* e) noexcept : start_{s}, end_{e}, t_{t} {} - rvalue(type t, const char* s, const char* e, uint8_t option) noexcept - : start_{s}, - end_{e}, - t_{t}, - option_{option} - {} rvalue(const rvalue& r) : start_(r.start_), @@ -272,13 +248,79 @@ namespace crow return boost::lexical_cast(start_, end_-start_); } + void unescape() const + { + if (*(start_-1)) + { + char* head = start_; + char* tail = start_; + while(head != end_) + { + if (*head == '\\') + { + switch(*++head) + { + case '"': *tail++ = '"'; break; + case '\\': *tail++ = '\\'; break; + case '/': *tail++ = '/'; break; + case 'b': *tail++ = '\b'; break; + case 'f': *tail++ = '\f'; break; + case 'n': *tail++ = '\n'; break; + case 'r': *tail++ = '\r'; break; + case 't': *tail++ = '\t'; break; + case 'u': + { + auto from_hex = [](char c) + { + if (c >= 'a') + return c - 'a' + 10; + if (c >= 'A') + return c - 'A' + 10; + return c - '0'; + }; + unsigned int code = + (from_hex(head[1])<<12) + + (from_hex(head[2])<< 8) + + (from_hex(head[3])<< 4) + + from_hex(head[4]); + if (code >= 0x800) + { + *tail++ = 0b11100000 | (code >> 12); + *tail++ = 0b10000000 | ((code >> 6) & 0b111111); + *tail++ = 0b10000000 | (code & 0b111111); + } + else if (code >= 0x80) + { + *tail++ = 0b11000000 | (code >> 6); + *tail++ = 0b10000000 | (code & 0b111111); + } + else + { + *tail++ = code; + } + head += 4; + } + break; + } + } + else + *tail++ = *head; + head++; + } + end_ = tail; + *end_ = 0; + *(start_-1) = 0; + } + } + detail::r_string s() const { #ifndef CROW_JSON_NO_ERROR_CHECK if (t() != type::String) throw std::runtime_error("value is not string"); #endif - return detail::r_string{start_, (uint32_t)(end_-start_), has_escaping()}; + unescape(); + return detail::r_string{start_, end_}; } bool has(const char* str) const @@ -341,6 +383,8 @@ namespace crow size_t size() const { + if (t() == type::String) + return s().size(); #ifndef CROW_JSON_NO_ERROR_CHECK if (t() != type::Object && t() != type::List) throw std::runtime_error("value is not a container"); @@ -422,20 +466,18 @@ namespace crow return (option_&error_bit)!=0; } private: - bool has_escaping() const - { - return (option_&1)!=0; - } bool is_cached() const { - return (option_&2)!=0; + return (option_&cached_bit)!=0; } void set_cached() const { - option_ |= 2; + option_ |= cached_bit; } void copy_l(const rvalue& r) { + if (r.t() != type::Object && r.t() != type::List) + return; lsize_ = r.lsize_; lremain_ = 0; l_.reset(new rvalue[lsize_]); @@ -462,8 +504,8 @@ namespace crow lremain_ --; } - const char* start_; - const char* end_; + mutable char* start_; + mutable char* end_; detail::r_string key_; std::unique_ptr l_; uint32_t lsize_; @@ -471,7 +513,7 @@ namespace crow type t_; mutable uint8_t option_{0}; - friend rvalue load_nocopy_internal(const char* data, size_t size); + friend rvalue load_nocopy_internal(char* data, size_t size); friend rvalue load(const char* data, size_t size); friend std::ostream& operator <<(std::ostream& os, const rvalue& r) { @@ -505,7 +547,7 @@ namespace crow { if (!first) os << ','; - os << '"' << escape(r.key_) << '"'; + os << '"' << escape(x.key_) << "\":"; first = false; os << x; } @@ -516,6 +558,8 @@ namespace crow return os; } }; + namespace detail { + } bool operator == (const rvalue& l, const std::string& r) { @@ -561,12 +605,12 @@ namespace crow //inline rvalue decode(const std::string& s) //{ //} - inline rvalue load_nocopy_internal(const char* data, size_t size) + inline rvalue load_nocopy_internal(char* data, size_t size) { //static const char* escaped = "\"\\/\b\f\n\r\t"; struct Parser { - Parser(const char* data, size_t size) + Parser(char* data, size_t size) : data(data) { } @@ -588,7 +632,7 @@ namespace crow { if (crow_json_unlikely(!consume('"'))) return {}; - const char* start = data; + char* start = data; uint8_t has_escaping = 0; while(1) { @@ -598,19 +642,33 @@ namespace crow } else if (*data == '"') { + *data = 0; + *(start-1) = has_escaping; data++; - return {type::String, start, data-1, has_escaping}; + return {type::String, start, data-1}; } else if (*data == '\\') { has_escaping = 1; - // TODO data++; switch(*data) { case 'u': - data += 4; - // TODO + { + auto check = [](char c) + { + return + ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F'); + }; + if (!(check(*(data+1)) && + check(*(data+2)) && + check(*(data+3)) && + check(*(data+4)))) + return {}; + } + data += 5; break; case '"': case '\\': @@ -674,7 +732,7 @@ namespace crow rvalue decode_number() { - const char* start = data; + char* start = data; enum NumberParsingState { @@ -916,7 +974,7 @@ namespace crow return ret; } - const char* data; + char* data; }; return Parser(data, size).parse(); } @@ -1140,14 +1198,9 @@ namespace crow int count(const std::string& str) { if (t_ != type::Object) - reset(); - t_ = type::Object; + return 0; if (!o) - o = std::move( - std::unique_ptr< - std::unordered_map - >( - new std::unordered_map{})); + return 0; return o->count(str); } @@ -1224,7 +1277,13 @@ namespace crow case type::Null: out += "null"; break; case type::False: out += "false"; break; case type::True: out += "true"; break; - case type::Number: out += boost::lexical_cast(v.d); break; + case type::Number: + { + char outbuf[128]; + sprintf(outbuf, "%g", v.d); + out += outbuf; + } + break; case type::String: dump_string(v.s, out); break; case type::List: { diff --git a/mustache.h b/mustache.h index 3f40c0e..3fa002f 100644 --- a/mustache.h +++ b/mustache.h @@ -54,84 +54,210 @@ namespace crow parse(); } - std::string render(context& ctx) + private: + std::string tag_name(const Action& action) { - std::vector stack; - stack.emplace_back(&ctx); - auto tag_name = [&](const Action& action) - { - return body_.substr(action.start, action.end - action.start); - }; - auto find_context = [&](const std::string& name)->std::pair - { - for(auto it = stack.rbegin(); it != stack.rend(); ++it) - { - std::cerr << "finding " << name << " on " << (int)(*it)->t() << std::endl; - if ((*it)->t() == json::type::Object) - { - for(auto jt = (*it)->o->begin(); jt != (*it)->o->end(); ++jt) - { - std::cerr << '\t' << jt->first << ' ' << json::dump(jt->second) << std::endl; - } - if ((*it)->count(name)) - return {true, (**it)[name]}; - } - } - - static json::wvalue empty_str; - empty_str = ""; - return {false, empty_str}; - }; - int current = 0; - std::string ret; - while(current < fragments_.size()) + return body_.substr(action.start, action.end - action.start); + } + auto find_context(const std::string& name, const std::vector& stack)->std::pair + { + if (name == ".") + { + return {true, *stack.back()}; + } + int dotPosition = name.find("."); + if (dotPosition == (int)name.npos) + { + for(auto it = stack.rbegin(); it != stack.rend(); ++it) + { + if ((*it)->t() == json::type::Object) + { + if ((*it)->count(name)) + return {true, (**it)[name]}; + } + } + } + else + { + std::vector dotPositions; + dotPositions.push_back(-1); + while(dotPosition != (int)name.npos) + { + dotPositions.push_back(dotPosition); + dotPosition = name.find(".", dotPosition+1); + } + dotPositions.push_back(name.size()); + std::vector names; + names.reserve(dotPositions.size()-1); + for(int i = 1; i < (int)dotPositions.size(); i ++) + names.emplace_back(name.substr(dotPositions[i-1]+1, dotPositions[i]-dotPositions[i-1]-1)); + + for(auto it = stack.rbegin(); it != stack.rend(); ++it) + { + context* view = *it; + bool found = true; + for(auto jt = names.begin(); jt != names.end(); ++jt) + { + if (view->t() == json::type::Object && + view->count(*jt)) + { + view = &(*view)[*jt]; + } + else + { + found = false; + break; + } + } + if (found) + return {true, *view}; + } + + } + + static json::wvalue empty_str; + empty_str = ""; + return {false, empty_str}; + } + + void escape(const std::string& in, std::string& out) + { + out.reserve(out.size() + in.size()); + for(auto it = in.begin(); it != in.end(); ++it) + { + switch(*it) + { + case '&': out += "&"; break; + case '<': out += "<"; break; + case '>': out += ">"; break; + case '"': out += """; break; + case '\'': out += "'"; break; + case '/': out += "/"; break; + default: out += *it; break; + } + } + } + + void render_internal(int actionBegin, int actionEnd, std::vector& stack, std::string& out) + { + int current = actionBegin; + while(current < actionEnd) { - auto& fragment = fragments_[current]; - auto& action = actions_[current]; - ret += body_.substr(fragment.first, fragment.second-fragment.first); + auto& fragment = fragments_[current]; + auto& action = actions_[current]; + out.insert(out.size(), body_, fragment.first, fragment.second-fragment.first); switch(action.t) { - case ActionType::Ignore: - // do nothing - break; - case ActionType::Tag: - { - auto optional_ctx = find_context(tag_name(action)); - auto& ctx = optional_ctx.second; - switch(ctx.t()) - { - case json::type::Number: - ret += json::dump(ctx); - break; - case json::type::String: - ret += ctx.s; - break; - default: - throw std::runtime_error("not implemented tag type" + boost::lexical_cast((int)ctx.t())); - } - } - break; - case ActionType::OpenBlock: - { - std::cerr << tag_name(action) << std::endl; - auto optional_ctx = find_context(tag_name(action)); - std::cerr << optional_ctx.first << std::endl; - if (!optional_ctx.first) - current = action.pos; - auto& ctx = optional_ctx.second; - if (ctx.t() == json::type::Null || ctx.t() == json::type::False) - current = action.pos; - stack.push_back(&ctx); - break; - } - case ActionType::CloseBlock: - stack.pop_back(); - break; - default: - throw std::runtime_error("not implemented " + boost::lexical_cast((int)action.t)); + case ActionType::Ignore: + // do nothing + break; + case ActionType::UnescapeTag: + case ActionType::Tag: + { + auto optional_ctx = find_context(tag_name(action), stack); + auto& ctx = optional_ctx.second; + switch(ctx.t()) + { + case json::type::Number: + out += json::dump(ctx); + break; + case json::type::String: + if (action.t == ActionType::Tag) + escape(ctx.s, out); + else + out += ctx.s; + break; + default: + throw std::runtime_error("not implemented tag type" + boost::lexical_cast((int)ctx.t())); + } + } + break; + case ActionType::ElseBlock: + { + static context nullContext; + auto optional_ctx = find_context(tag_name(action), stack); + if (!optional_ctx.first) + { + stack.emplace_back(&nullContext); + break; + } + + auto& ctx = optional_ctx.second; + switch(ctx.t()) + { + case json::type::List: + if (ctx.l && !ctx.l->empty()) + current = action.pos; + else + stack.emplace_back(&nullContext); + break; + case json::type::False: + case json::type::Null: + stack.emplace_back(&nullContext); + break; + default: + current = action.pos; + break; + } + break; + } + case ActionType::OpenBlock: + { + auto optional_ctx = find_context(tag_name(action), stack); + if (!optional_ctx.first) + { + current = action.pos; + break; + } + + auto& ctx = optional_ctx.second; + switch(ctx.t()) + { + case json::type::List: + if (ctx.l) + for(auto it = ctx.l->begin(); it != ctx.l->end(); ++it) + { + stack.push_back(&*it); + render_internal(current+1, action.pos, stack, out); + stack.pop_back(); + } + current = action.pos; + break; + case json::type::Number: + case json::type::String: + case json::type::Object: + case json::type::True: + stack.push_back(&ctx); + break; + case json::type::False: + case json::type::Null: + current = action.pos; + break; + default: + throw std::runtime_error("{{#: not implemented context type: " + boost::lexical_cast((int)ctx.t())); + break; + } + break; + } + case ActionType::CloseBlock: + stack.pop_back(); + break; + default: + throw std::runtime_error("not implemented " + boost::lexical_cast((int)action.t)); } - current++; + current++; } + auto& fragment = fragments_[actionEnd]; + out.insert(out.size(), body_, fragment.first, fragment.second - fragment.first); + } + public: + std::string render(context& ctx) + { + std::vector stack; + stack.emplace_back(&ctx); + + std::string ret; + render_internal(0, fragments_.size()-1, stack, ret); return ret; } @@ -172,11 +298,15 @@ namespace crow { case '#': idx++; + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; blockPositions.emplace_back(actions_.size()); actions_.emplace_back(ActionType::OpenBlock, idx, endIdx); break; case '/': idx++; + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; { auto& matched = actions_[blockPositions.back()]; if (body_.compare(idx, endIdx-idx, @@ -192,15 +322,21 @@ namespace crow blockPositions.pop_back(); break; case '^': + idx++; + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; blockPositions.emplace_back(actions_.size()); - actions_.emplace_back(ActionType::ElseBlock, idx+1, endIdx); + actions_.emplace_back(ActionType::ElseBlock, idx, endIdx); break; case '!': // do nothing action actions_.emplace_back(ActionType::Ignore, idx+1, endIdx); break; case '>': // partial - actions_.emplace_back(ActionType::Partial, idx+1, endIdx); + idx++; + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; + actions_.emplace_back(ActionType::Partial, idx, endIdx); throw invalid_template_exception("{{>: partial not implemented: " + body_.substr(idx+1, endIdx-idx-1)); break; case '{': @@ -212,11 +348,15 @@ namespace crow { throw invalid_template_exception("{{{: }}} not matched"); } + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; actions_.emplace_back(ActionType::UnescapeTag, idx, endIdx); current++; break; case '&': idx ++; + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; actions_.emplace_back(ActionType::UnescapeTag, idx, endIdx); break; case '=': @@ -256,10 +396,64 @@ namespace crow break; default: // normal tag case; + while(body_[idx] == ' ') idx++; + while(body_[endIdx-1] == ' ') endIdx--; actions_.emplace_back(ActionType::Tag, idx, endIdx); break; } } + + // removing standalones + for(int i = actions_.size()-2; i >= 0; i --) + { + if (actions_[i].t == ActionType::Tag || actions_[i].t == ActionType::UnescapeTag) + continue; + auto& fragment_before = fragments_[i]; + auto& fragment_after = fragments_[i+1]; + bool is_last_action = i == (int)actions_.size()-2; + bool all_space_before = true; + int j, k; + for(j = fragment_before.second-1;j >= fragment_before.first;j--) + { + if (body_[j] != ' ') + { + all_space_before = false; + break; + } + } + if (all_space_before && i > 0) + continue; + if (!all_space_before && body_[j] != '\n') + continue; + bool all_space_after = true; + for(k = fragment_after.first; k < (int)body_.size() && k < fragment_after.second; k ++) + { + if (body_[k] != ' ') + { + all_space_after = false; + break; + } + } + if (all_space_after && !is_last_action) + continue; + if (!all_space_after && + !( + body_[k] == '\n' + || + (body_[k] == '\r' && + k + 1 < (int)body_.size() && + body_[k+1] == '\n'))) + continue; + fragment_before.second = j+1; + if (!all_space_after) + { + if (body_[k] == '\n') + k++; + else + k += 2; + fragment_after.first = k; + } + } } std::vector> fragments_; diff --git a/template_test/test.py b/template_test/test.py index b492396..f99ca23 100755 --- a/template_test/test.py +++ b/template_test/test.py @@ -6,21 +6,20 @@ import subprocess for testfile in glob.glob("*.json"): testdoc = json.load(open(testfile)) for test in testdoc["tests"]: - if "partials" in test: - continue - if "partial" in test: - continue if "lambda" in test["data"]: continue - print testfile, test["name"] - print json.dumps(test["data"]) - print test["template"] + if "partials" in test: + #print testfile, test["name"] + continue open('data', 'w').write(json.dumps(test["data"])) open('template', 'w').write(test["template"]) ret = subprocess.check_output("./mustachetest") + print testfile, test["name"] if ret != test["expected"]: - print 'Expected:',(test["expected"]) - print 'Actual:',(ret) + print json.dumps(test["data"]) + print test["template"] + print 'Expected:',repr(test["expected"]) + print 'Actual:',repr(ret) + assert ret == test["expected"] os.unlink('data') os.unlink('template') - assert ret == test["expected"] diff --git a/unittest.cpp b/unittest.cpp index 680bf50..07bf2bf 100644 --- a/unittest.cpp +++ b/unittest.cpp @@ -30,14 +30,14 @@ void error_print(const A& a, Args...args) template void fail(Args...args) { error_print(args...);failed__ = true; } -#define ASSERT_TRUE(x) if (!(x)) fail("Assert fail: expected ", #x, " is true, at " __FILE__ ":",__LINE__) -#define ASSERT_EQUAL(a, b) if ((a) != (b)) fail("Assert fail: expected ", (a), " actual " , (b), ", " #a " == " #b ", at " __FILE__ ":",__LINE__) -#define ASSERT_NOTEQUAL(a, b) if ((a) == (b)) fail("Assert fail: not expected ", (a), ", " #a " != " #b ", at " __FILE__ ":",__LINE__) +#define ASSERT_TRUE(x) if (!(x)) fail(__FILE__ ":", __LINE__, ": Assert fail: expected ", #x, " is true, at " __FILE__ ":",__LINE__) +#define ASSERT_EQUAL(a, b) if ((a) != (b)) fail(__FILE__ ":", __LINE__, ": Assert fail: expected ", (a), " actual " , (b), ", " #a " == " #b ", at " __FILE__ ":",__LINE__) +#define ASSERT_NOTEQUAL(a, b) if ((a) == (b)) fail(__FILE__ ":", __LINE__, ": Assert fail: not expected ", (a), ", " #a " != " #b ", at " __FILE__ ":",__LINE__) #define ASSERT_THROW(x) \ try \ { \ x; \ - fail("Assert fail: exception should be thrown"); \ + fail(__FILE__ ":", __LINE__, ": Assert fail: exception should be thrown"); \ } \ catch(std::exception&) \ { \ @@ -329,7 +329,7 @@ TEST(json_read) ASSERT_EQUAL(false, x.has("mess")); ASSERT_THROW(x["mess"]); ASSERT_THROW(3 == x["message"]); - ASSERT_THROW(x["message"].size()); + ASSERT_EQUAL(12, x["message"].size()); std::string s = R"({"int":3, "ints" :[1,2,3,4,5] })"; auto y = json::load(s); @@ -352,6 +352,29 @@ TEST(json_read) } +TEST(json_read_unescaping) +{ + { + auto x = json::load(R"({"data":"\ud55c\n\t\r"})"); + if (!x) + { + fail("fail to parse"); + return; + } + ASSERT_EQUAL(6, x["data"].size()); + ASSERT_EQUAL("한\n\t\r", x["data"]); + } + { + // multiple r_string instance + auto x = json::load(R"({"data":"\ud55c\n\t\r"})"); + auto a = x["data"].s(); + auto b = x["data"].s(); + ASSERT_EQUAL(6, a.size()); + ASSERT_EQUAL(6, b.size()); + ASSERT_EQUAL(6, x["data"].size()); + } +} + TEST(json_write) { json::wvalue x; -- cgit v1.2.3-54-g00ecf