Improve overall parsing

This commit is contained in:
Loic Guegan 2022-01-24 15:29:22 +01:00
parent fd78f92863
commit 8c77f7a054
8 changed files with 372 additions and 93 deletions

View file

@ -9,6 +9,7 @@ set(PGNP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/includes) # For conveniance
set(PGNP_INCLUDE_DIR ${PGNP_INCLUDE_DIR} PARENT_SCOPE) # To be used by other projects with add_subdirectory()
file(MAKE_DIRECTORY ${PGNP_INCLUDE_DIR})
configure_file(src/pgnp.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
include_directories(${PGNP_INCLUDE_DIR})
# Unit tests
enable_testing()

View file

@ -2,80 +2,274 @@
#include "pgnp.hpp"
#include <iostream>
#define IS_BLANK(c) (c==' ' || c=='\n' || c=='\t')
#define IS_EOF(loc) (loc>=pgn_content.size())
#define EOF_CHECK(loc) {if(IS_EOF(loc)) throw UnexpectedEOF();}
#define IS_BLANK(c) (c == ' ' || c == '\n' || c == '\t')
#define IS_DIGIT(c) \
(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || \
c == '6' || c == '7' || c == '8' || c == '9')
#define IS_EOF(loc) (loc >= pgn_content.size())
#define EOF_CHECK(loc) \
{ \
if (IS_EOF(loc)) \
throw UnexpectedEOF(); \
}
namespace pgnp {
void PGN::FromFile(std::string filepath){
std::ifstream file(filepath);
std::string content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
FromString(content);
HalfMove::HalfMove() : isBlack(false), MainLine(NULL) {}
HalfMove::~HalfMove() {
for (auto *move : variations) {
delete move;
}
}
void HalfMove::NestedDump(HalfMove *m, int indent) {
for (int i = 0; i < indent; i++) {
std::cout << " ";
}
std::cout << " "
<< " Move=" << m->move << " Count=" << m->count << " Comment=\""
<< m->comment << "\""
<< " IsBlack=" << m->isBlack
<< " Variations=" << m->variations.size() << std::endl;
for (auto *var : m->variations) {
NestedDump(var, indent + 1);
}
if (m->MainLine != NULL) {
NestedDump(m->MainLine, indent);
}
}
void PGN::FromString(std::string pgn_content){
this->pgn_content=pgn_content;
int loc=0;
while(!IS_EOF(loc)) {
char c=pgn_content[loc];
if(!IS_BLANK(c)){
switch (c) {
case '[':
loc=ParseNextTag(loc);
break;
}
void HalfMove::Dump() { NestedDump(this, 0); }
int HalfMove::GetLength() {
int length = 0;
HalfMove *m = this;
while (m != NULL) {
length++;
m = m->MainLine;
}
return length;
}
PGN::~PGN() {
if (moves != NULL)
delete moves;
}
void PGN::FromFile(std::string filepath) {
std::ifstream file(filepath);
std::string content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
FromString(content);
}
void PGN::FromString(std::string pgn_content) {
this->pgn_content = pgn_content;
moves = NULL;
int loc = 0;
while (!IS_EOF(loc)) {
char c = pgn_content[loc];
if (!IS_BLANK(c)) {
if (c == '[') {
loc = ParseNextTag(loc);
} else if (IS_DIGIT(c)) {
moves = new HalfMove();
loc = ParseLine(loc, moves);
break;
}
}
loc++;
}
}
void PGN::STRCheck() {
int i = 0;
// Locate Event tag
while (i < tagkeys.size()) {
if (tagkeys[i] == "Event") {
break;
}
i++;
}
// Check tags
if (i + 6 < tagkeys.size()) {
bool valid = (tagkeys[i] == "Event") && (tagkeys[i + 1] == "Site") &&
(tagkeys[i + 2] == "Date") && (tagkeys[i + 3] == "Round") &&
(tagkeys[i + 4] == "White") && (tagkeys[i + 5] == "Black") &&
(tagkeys[i + 6] == "Result");
if (!valid) {
throw STRCheckFailed();
}
} else {
throw STRCheckFailed();
}
}
bool PGN::HasTag(std::string key) {
auto tags = GetTagList();
return (std::find(tags.begin(), tags.end(), key) != tags.end());
}
int PGN::ParseLine(int loc, HalfMove *hm) {
// Goto next char
loc = NextNonBlank(loc);
EOF_CHECK(loc);
char c = pgn_content[loc];
// Check if we reach score entry (* or 1-0 or 0-1 or 1/2-1/2)
if (!IS_EOF(loc + 1)) {
char nc = pgn_content[loc + 1]; // Next c
if ((IS_DIGIT(c) && nc == '-') or (IS_DIGIT(c) && nc == '/')) {
return (loc);
}
}
// Parse (move number
if (IS_DIGIT(c)) {
std::string move_nb;
while (IS_DIGIT(c)) {
move_nb += c;
loc++;
c = pgn_content[loc];
EOF_CHECK(loc);
}
/*for (auto const& [key, val] : tags){
std::cout << key <<"="<<val<<std::endl;
}*/
hm->count = std::stoi(move_nb);
loc++;
EOF_CHECK(loc);
if (pgn_content[loc] == '.') {
hm->isBlack = true;
loc += 2; // Skip two dots
EOF_CHECK(loc);
}
} else {
hm->isBlack = true;
}
int PGN::ParseNextTag(int start_loc){
// Parse key
std::string key;
int keyloc=start_loc+1;
EOF_CHECK(keyloc);
char c=pgn_content[keyloc];
while(!IS_BLANK(c)){
key+=c;
keyloc++;
EOF_CHECK(keyloc);
c=pgn_content[keyloc];
}
// Parse the HalfMove
loc = NextNonBlank(loc);
EOF_CHECK(loc);
c = pgn_content[loc];
std::string move;
while (!IS_BLANK(c) && c != ')') {
move += c;
loc++;
c = pgn_content[loc];
EOF_CHECK(loc);
}
hm->move = move;
// Parse value
std::string value;
int valueloc=NextNonBlank(keyloc)+1;
EOF_CHECK(keyloc);
c=pgn_content[valueloc];
while(c!='"' or IS_EOF(valueloc)){
value+=c;
valueloc++;
EOF_CHECK(keyloc);
c=pgn_content[valueloc];
}
// Add tag
tags[key]=value;
return(valueloc+1); // +1 For the last char of the tag which is ']'
// Skip end of variation
if (c == ')') {
loc++;
return (loc);
}
int PGN::NextNonBlank(int loc){
char c=pgn_content[loc];
while(IS_BLANK(c)){
// Check for comment
loc = NextNonBlank(loc);
if (!IS_EOF(loc) && pgn_content[loc] == '{') {
loc++; // Skip '{'
c = pgn_content[loc];
while (c != '}') {
hm->comment += c;
loc++;
c=pgn_content[loc];
EOF_CHECK(loc);
c = pgn_content[loc];
}
return(loc);
loc++; // Skip '}'
}
}
// Check for variations
loc = NextNonBlank(loc);
while (!IS_EOF(loc) && pgn_content[loc] == '(') {
loc++; // Skip '('
HalfMove *var = new HalfMove;
loc = ParseLine(loc, var);
hm->variations.push_back(var);
loc++; // Skip ')'
}
// Parse next HalfMove
loc = NextNonBlank(loc);
if (!IS_EOF(loc)) {
HalfMove *next_hm = new HalfMove;
next_hm->count = hm->count;
loc = ParseLine(loc, next_hm);
// Check if move parsed successfuly
if (next_hm->move.size() > 0) {
hm->MainLine = next_hm;
} else {
delete next_hm;
}
}
return (loc);
}
int PGN::ParseNextTag(int start_loc) {
// Parse key
std::string key;
int keyloc = start_loc + 1;
EOF_CHECK(keyloc);
char c = pgn_content[keyloc];
while (!IS_BLANK(c)) {
key += c;
keyloc++;
EOF_CHECK(keyloc);
c = pgn_content[keyloc];
}
// Parse value
std::string value;
int valueloc = NextNonBlank(keyloc) + 1;
EOF_CHECK(keyloc);
c = pgn_content[valueloc];
while (c != '"' or IS_EOF(valueloc)) {
value += c;
valueloc++;
EOF_CHECK(keyloc);
c = pgn_content[valueloc];
}
// Add tag
tags[key] = value;
tagkeys.push_back(key);
// TODO: Check that caracters if a ]
return (valueloc + 1); // +1 For the last char of the tag which is ']'
}
HalfMove *PGN::GetMoves() { return (moves); }
std::vector<std::string> PGN::GetTagList() { return tagkeys; }
std::string PGN::GetTagValue(std::string key) { return tags[key]; }
void PGN::Dump() {
std::cout << "---------- PGN DUMP ----------" << std::endl;
std::cout << "Tags:" << std::endl;
for (auto &tag : GetTagList()) {
std::cout << " " << tag << "=" << GetTagValue(tag) << std::endl;
}
std::cout << "Moves:" << std::endl;
if (moves != NULL)
moves->Dump();
}
int PGN::NextNonBlank(int loc) {
char c = pgn_content[loc];
while (IS_BLANK(c)) {
loc++;
if (IS_EOF(loc))
return (loc);
c = pgn_content[loc];
}
return (loc);
}
} // namespace pgnp

View file

@ -1,46 +1,75 @@
#include <unordered_map>
#include <string>
#include <fstream>
#include <streambuf>
#include <iostream>
#include <algorithm>
#include <exception>
#include <fstream>
#include <iostream>
#include <streambuf>
#include <string>
#include <unordered_map>
#include <vector>
namespace pgnp {
class HalfMove {
private:
/// @brief Recursive dump
void NestedDump(HalfMove *, int);
typedef struct HalfMove {
public:
int count;
bool isBlack;
std::string move;
std::string comment;
HalfMove *MainLine;
std::vector<HalfMove *> variations;
} HalfMove;
HalfMove();
~HalfMove();
int GetLength();
/// @brief Dump move and all its variations
void Dump();
};
class PGN {
private:
std::unordered_map<std::string, std::string> tags;
std::vector<std::string> tagkeys;
class PGN {
private:
std::unordered_map<std::string,std::string> tags;
HalfMove moves;
std::string pgn_content;
HalfMove *moves;
std::string pgn_content;
public:
void FromFile(std::string);
void FromString(std::string);
public:
~PGN();
void FromFile(std::string);
void FromString(std::string);
bool HasTag(std::string);
/// @brief Perform a Seven Tag Roster compliance check
void STRCheck();
/// @brief Dump parsed PGN
void Dump();
std::vector<std::string> GetTagList();
std::string GetTagValue(std::string);
HalfMove *GetMoves();
private:
/// @brief Populate @a tags with by parsing the one starting at location in
/// argument
int ParseNextTag(int);
private:
/// @brief Get the next non-blank char location starting from location in
/// argument
int NextNonBlank(int);
/// @brief Populate @a tags with by parsing the one starting at location in argument
int ParseNextTag(int);
/// @brief Get the next non-blank char location starting from location in argument
int NextNonBlank(int);
};
int ParseLine(int, HalfMove *);
};
struct UnexpectedEOF : public std::exception {
const char *what() const throw() { return "Unexpected end of pgn file"; }
};
struct STRCheckFailed : public std::exception {
const char *what() const throw() {
return "Seven Tag Roster compliance check failed";
}
};
struct UnexpectedEOF : public std::exception
{
const char * what () const throw ()
{
return "Unexpected end of pgn file";
}
};
}
} // namespace pgnp

View file

@ -5,4 +5,5 @@ file(COPY pgn_files DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
# Run tests
add_executable(pgnp_tests tests.cpp ./catch3/catch_amalgamated.cpp)
target_link_libraries(pgnp_tests pgnp)
add_test(PGNP_Tests pgnp_tests)

View file

@ -0,0 +1,15 @@
[Event "F/S Return Match"]
[Site "Belgrade, Serbia JUG"]
[Date "1992.11.04"]
[Round "29"]
[White "Fischer, Robert J."]
[Black "Spassky, Boris V."]
[Result "1/2-1/2"]
1. e4 e5 2. Nf3 Nc6 3. Bb5 a6 4. Ba4 Nf6 5. O-O Be7 6. Re1 b5 7. Bb3 d6 8. c3
O-O 9. h3 Nb8 10. d4 Nbd7 11. c4 c6 12. cxb5 axb5 13. Nc3 Bb7 14. Bg5 b4 15.
Nb1 h6 16. Bh4 c5 17. dxe5 Nxe4 18. Bxe7 Qxe7 19. exd6 Qf6 20. Nbd2 Nxd6 21.
Nc4 Nxc4 22. Bxc4 Nb6 23. Ne5 Rae8 24. Bxf7+ Rxf7 25. Nxf7 Rxe1+ 26. Qxe1 Kxf7
27. Qe3 Qg5 28. Qxg5 hxg5 29. b3 Ke6 30. a3 Kd6 31. axb4 cxb4 32. Ra5 Nd5 33.
f3 Bc8 34. Kf2 Bf5 35. Ra7 g6 36. Ra6+ Kc5 37. Ke1 Nf4 38. g3 Nxh3 39. Kd2 Kb5
40. Rd6 Kc5 41. Ra6 Nf2 42. g4 Bd3 43. Re6 1/2-1/2

View file

@ -17,4 +17,4 @@
[Termination "Normal"]
[Annotator "lichess.org"]
1. g3 { A00 Hungarian Opening } d5 2. Bg2 Nf6 3. c4 c6 4. Nf3 h6?! { (0.22 → 0.74) Inaccuracy. dxc4 was best. } (4... dxc4 5. O-O Nbd7 6. Qc2 Nb6 7. a4 a5 8. Na3 Be6 9. Ne5) 5. e3?! { (0.74 → -0.15) Inaccuracy. O-O was best. } (5. O-O Bf5 6. d3 e6 7. cxd5 cxd5 8. Qb3 Qb6 9. Nd4 Bg6) 5... Bf5 6. Nc3?! { (-0.15 → -0.75) Inaccuracy. d4 was best. } (6. d4 Nbd7 7. b3 e6 8. O-O Be7 9. Nc3 O-O 10. Bb2 a5 11. Qe2 Ne4 12. Nxe4 Bxe4) 6... e6 7. d4 Be7 8. Qe2 O-O 9. a3 a5 10. Bd2?! { (-0.33 → -0.84) Inaccuracy. b3 was best. } (10. b3 Re8) 10... Bc2 11. O-O Bb3 12. c5?? { (-0.22 → -3.09) Blunder. cxd5 was best. } (12. cxd5) 12... Bc4 13. Qd1 Bxf1 14. Bxf1 b6 15. cxb6 Qxb6 16. Na4 Qa7 17. Rc1 Rc8 18. Ne5 c5 19. dxc5 Bxc5 20. Bb5? { (-2.12 → -4.00) Mistake. Rc2 was best. } (20. Rc2 Nbd7) 20... Ne4?? { (-4.00 → -1.02) Blunder. Qb7 was best. } (20... Qb7 21. Be8) 21. Kg2?! { (-1.02 → -2.00) Inaccuracy. b4 was best. } (21. b4) 21... Qb7 22. f3?? { (-2.13 → -7.56) Blunder. Bd3 was best. } (22. Bd3 Ba7) 22... Nxd2 23. Bc6 Nxc6 24. Nxc5 Qxb2 25. Ned3 Qxa3 26. Qxd2 Nb4 27. Nb2 Rc7 28. Kh3 Rac8 29. Nba4 Na6 30. Rc3 Qa1 31. Rc1 Qf6 32. Qxa5 Nxc5 33. Nb6 Nb3 { White resigns. } 0-1
1. g3 d5 2. Bg2 Nf6 3. c4 c6

View file

@ -0,0 +1,20 @@
[Event "Rated Rapid game"]
[Site "https://lichess.org/cMjillK3"]
[Date "2021.11.08"]
[White "UpSideGravity"]
[Black "manzerbredes"]
[Result "0-1"]
[UTCDate "2021.11.08"]
[UTCTime "11:06:47"]
[WhiteElo "1830"]
[BlackElo "1818"]
[WhiteRatingDiff "-45"]
[BlackRatingDiff "+13"]
[Variant "Standard"]
[TimeControl "600+5"]
[ECO "A00"]
[Opening "Hungarian Opening"]
[Termination "Normal"]
[Annotator "lichess.org"]
1. g3 { A00 Hungarian Opening } d5 2. Bg2 Nf6 3. c4 c6 4. Nf3 h6?! { (0.22 → 0.74) Inaccuracy. dxc4 was best. } (4... dxc4 5. O-O Nbd7 6. Qc2 Nb6 7. a4 a5 8. Na3 Be6 9. Ne5) 5. e3?! { (0.74 → -0.15) Inaccuracy. O-O was best. } (5. O-O Bf5 6. d3 e6 7. cxd5 cxd5 8. Qb3 Qb6 9. Nd4 Bg6) 5... Bf5 6. Nc3?! { (-0.15 → -0.75) Inaccuracy. d4 was best. } (6. d4 Nbd7 7. b3 e6 8. O-O Be7 9. Nc3 O-O 10. Bb2 a5 11. Qe2 Ne4 12. Nxe4 Bxe4) 6... e6 7. d4 Be7 8. Qe2 O-O 9. a3 a5 10. Bd2?! { (-0.33 → -0.84) Inaccuracy. b3 was best. } (10. b3 Re8) 10... Bc2 11. O-O Bb3 12. c5?? { (-0.22 → -3.09) Blunder. cxd5 was best. } (12. cxd5) 12... Bc4 13. Qd1 Bxf1 14. Bxf1 b6 15. cxb6 Qxb6 16. Na4 Qa7 17. Rc1 Rc8 18. Ne5 c5 19. dxc5 Bxc5 20. Bb5? { (-2.12 → -4.00) Mistake. Rc2 was best. } (20. Rc2 Nbd7) 20... Ne4?? { (-4.00 → -1.02) Blunder. Qb7 was best. } (20... Qb7 21. Be8) 21. Kg2?! { (-1.02 → -2.00) Inaccuracy. b4 was best. } (21. b4) 21... Qb7 22. f3?? { (-2.13 → -7.56) Blunder. Bd3 was best. } (22. Bd3 Ba7) 22... Nxd2 23. Bc6 Nxc6 24. Nxc5 Qxb2 25. Ned3 Qxa3 26. Qxd2 Nb4 27. Nb2 Rc7 28. Kh3 Rac8 29. Nba4 Na6 30. Rc3 Qa1 31. Rc1 Qf6 32. Qxa5 Nxc5 33. Nb6 Nb3 { White resigns. } 0-1

View file

@ -1,6 +1,25 @@
#include "pgnp.hpp"
#include <catch_amalgamated.hpp>
using namespace pgnp;
TEST_CASE( "DUMMY TEST", "[test]" ) {
REQUIRE( 1 == 1 );
TEST_CASE("Valid PGN", "[pgn1]") {
PGN pgn;
REQUIRE_NOTHROW(pgn.FromFile("pgn_files/valid/pgn1.pgn"));
REQUIRE_THROWS(pgn.STRCheck());
REQUIRE(pgn.GetMoves()->GetLength() == 6);
}
TEST_CASE("Valid PGN", "[pgn2]") {
PGN pgn;
REQUIRE_NOTHROW(pgn.FromFile("pgn_files/valid/pgn2.pgn"));
REQUIRE_THROWS(pgn.STRCheck());
REQUIRE(pgn.GetMoves()->GetLength() == 66);
}
TEST_CASE("Seven Tag Roster", "[pgn1]") {
PGN pgn;
REQUIRE_NOTHROW(pgn.FromFile("pgn_files/str/pgn1.pgn"));
REQUIRE_NOTHROW(pgn.STRCheck());
REQUIRE(pgn.GetMoves()->GetLength() == 85);
}