diff --git a/CMakeLists.txt b/CMakeLists.txt index 2381751..f493c30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10) project(pgnp) # Shared library -add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp) +add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp src/LargeFileStream.cpp) # Includes set(PGNP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/includes) # For conveniance @@ -10,6 +10,7 @@ set(PGNP_INCLUDE_DIR ${PGNP_INCLUDE_DIR} PARENT_SCOPE) # To be used by other pro file(MAKE_DIRECTORY ${PGNP_INCLUDE_DIR}) configure_file(src/PGN.hpp ${PGNP_INCLUDE_DIR}/pgnp.hpp COPYONLY) configure_file(src/HalfMove.hpp ${PGNP_INCLUDE_DIR} COPYONLY) +configure_file(src/LargeFileStream.hpp ${PGNP_INCLUDE_DIR} COPYONLY) include_directories(${PGNP_INCLUDE_DIR}) diff --git a/README.md b/README.md index 99fbdb0..af417e3 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,10 @@ PGNP is a Portable Game Notation (PGN) parser. More details about the PGN specification can be found [here](https://www.chessclub.com/help/PGN-spec). # Features -- Basic PGN parsing (tags, move, comments, variations etc.) +- Basic PGN parsing (tags, move, comments, variations, NAG, etc.) - Merged PGN files parsing (several games in one file) +- Handle very large file (severals GB) +- Very efficient # How to use it ? PGNP can be used as a shared library in your project. diff --git a/src/HalfMove.cpp b/src/HalfMove.cpp index 290587a..7e94ef9 100644 --- a/src/HalfMove.cpp +++ b/src/HalfMove.cpp @@ -19,8 +19,8 @@ std::string HalfMove::NestedDump(HalfMove *m, int indent) { ss << " " << " Move=" << m->move << " Count=" << m->count << " Comment=\"" << m->comment << "\"" - << " IsBlack=" << m->isBlack << " Variations=" << m->variations.size() - << std::endl; + << " NAG=" << m->NAG << " IsBlack=" << m->isBlack + << " Variations=" << m->variations.size() << std::endl; for (auto *var : m->variations) { ss << NestedDump(var, indent + 1); @@ -65,15 +65,15 @@ void HalfMove::Copy(HalfMove *copy) { } HalfMove *HalfMove::GetHalfMoveAt(int distance) { - HalfMove *tmp=this; - while(distance>0){ - if(tmp==NULL){ + HalfMove *tmp = this; + while (distance > 0) { + if (tmp == NULL) { throw HalfMoveOutOfRange(); } distance--; - tmp=tmp->MainLine; + tmp = tmp->MainLine; } - return(tmp); + return (tmp); } } // namespace pgnp \ No newline at end of file diff --git a/src/HalfMove.hpp b/src/HalfMove.hpp index e049571..8edea93 100644 --- a/src/HalfMove.hpp +++ b/src/HalfMove.hpp @@ -21,6 +21,7 @@ public: std::string move; /// @brief Comment associated to the move std::string comment; + std::string NAG; /// @brief Next HalfMove link to this line HalfMove *MainLine; /// @brief Next HalfMove links to variation of this line diff --git a/src/LargeFileStream.cpp b/src/LargeFileStream.cpp new file mode 100644 index 0000000..547415b --- /dev/null +++ b/src/LargeFileStream.cpp @@ -0,0 +1,63 @@ +#include "LargeFileStream.hpp" + +namespace pgnp { +using namespace std; + +LargeFileStream::LargeFileStream() + : chuck_count(-1), last_read_size(0), last_loc(0), use_string(false), + eof(false) {} + +void LargeFileStream::FromFile(std::string filepath) { + file.open(filepath); + ReadNextChunk(); +} + +void LargeFileStream::FromString(std::string content) { + use_string = true; + this->content = content; +} + +void LargeFileStream::ReadNextChunk() { + chuck_count++; + file.read(buffer, BUFFER_SIZE); + last_read_size = file.gcount(); +} + +char LargeFileStream::operator[](long loc) { + // Perform various checks + if (eof) { + throw ReadToFar(); + } + if (loc < last_loc) { + throw BackwardRead(); + } + last_loc = loc; // Keep track + + // Shortcut the operator for string content + if (use_string) { + if (loc >= content.size()) { + eof = true; + } + return ('?'); + } + + // Goto the right memory chuck + long loc_chunk_count = loc / BUFFER_SIZE; + while (chuck_count < loc_chunk_count) { + ReadNextChunk(); + } + long offset = loc - (loc_chunk_count * BUFFER_SIZE); + + // Ensure for EOF + if (!file && offset >= last_read_size) { + eof = true; + return ('?'); + } + + // Return character + return buffer[offset]; +} + +bool LargeFileStream::IsEOF(long loc) { return (eof); } + +} // namespace pgnp \ No newline at end of file diff --git a/src/LargeFileStream.hpp b/src/LargeFileStream.hpp new file mode 100644 index 0000000..6ac87e8 --- /dev/null +++ b/src/LargeFileStream.hpp @@ -0,0 +1,52 @@ +#define BUFFER_SIZE (1024 * 1024 / 2) + +#include +#include +#include + +namespace pgnp { +using namespace std; + +class LargeFileStream { + /// @brief File to load + ifstream file; + /// @brief In memory buffer + char buffer[BUFFER_SIZE]; + /// @brief Number of chuck read minus 1 + long chuck_count; + /// @brief Number of byte read during the last file access + long last_read_size; + /// @brief Keep track of the file offset (to prevent backward read) + long last_loc; + /// @brief Use a string as file content + std::string content; + /// @brief Use to shortcut some methods + bool use_string; + /// @brief End Of File ? + bool eof; + + /// @brief Load the next chuck of data from disk to memory + void ReadNextChunk(); + +public: + LargeFileStream(); + void FromFile(std::string filepath); + /// @brief Emulate file access with a string + void FromString(std::string content); + /// @brief Allow array like access to the file + char operator[](long loc); + /// @brief Check if we reach the EOF + bool IsEOF(long loc); + + // Various Exceptions + struct BackwardRead : public std::exception { + const char *what() const throw() { + return "LargeFileStream cannot read backward"; + } + }; + struct ReadToFar : public std::exception { + const char *what() const throw() { return "You reach the end of the file"; } + }; +}; + +} // namespace pgnp \ No newline at end of file diff --git a/src/PGN.cpp b/src/PGN.cpp index cd4bd96..953f225 100644 --- a/src/PGN.cpp +++ b/src/PGN.cpp @@ -7,7 +7,7 @@ #define IS_DIGIT(c) \ (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || \ c == '6' || c == '7' || c == '8' || c == '9') -#define IS_EOF(loc) ((loc) >= pgn_content.size()) +#define IS_EOF(loc) (pgn_content.IsEOF(loc)) #define EOF_CHECK(loc) \ { \ if (IS_EOF(loc)) \ @@ -25,17 +25,10 @@ PGN::~PGN() { std::string PGN::GetResult() { return (result); } -void PGN::FromFile(std::string filepath) { - std::ifstream inFile; - inFile.open(filepath); - std::stringstream strStream; - strStream << inFile.rdbuf(); - - this->pgn_content = strStream.str(); -} +void PGN::FromFile(std::string filepath) { pgn_content.FromFile(filepath); } void PGN::FromString(std::string pgn_content) { - this->pgn_content = pgn_content; + this->pgn_content.FromString(pgn_content); } void PGN::ParseNextGame() { @@ -46,20 +39,26 @@ void PGN::ParseNextGame() { result = ""; tagkeys.clear(); tags.clear(); - moves = new HalfMove(); + + // Search for new game + if (IS_EOF(LastGameEndLoc)) { + throw NoGameFound(); + } int loc = NextNonBlank(LastGameEndLoc); + if (IS_EOF(loc)) { throw NoGameFound(); } + + // Parse game while (!IS_EOF(loc)) { char c = pgn_content[loc]; if (!IS_BLANK(c)) { if (c == '[') { loc = ParseNextTag(loc); } else if (IS_DIGIT(c)) { - loc = ParseHalfMove(loc, moves); - LastGameEndLoc = loc + 1; // Next game start 1 char after the last one + LastGameEndLoc = ParseHalfMove(loc, moves); break; } else if (c == '{') { loc = ParseComment(loc, moves); @@ -68,6 +67,7 @@ void PGN::ParseNextGame() { } loc++; } + if (result.size() <= 0) { throw InvalidGameResult(); } @@ -102,7 +102,7 @@ bool PGN::HasTag(std::string key) { return (std::find(tags.begin(), tags.end(), key) != tags.end()); } -int PGN::ParseComment(int loc, HalfMove *hm) { +long PGN::ParseComment(long loc, HalfMove *hm) { // Goto next char loc = NextNonBlank(loc); EOF_CHECK(loc); @@ -123,42 +123,39 @@ int PGN::ParseComment(int loc, HalfMove *hm) { return (loc); } -int PGN::ParseHalfMove(int loc, HalfMove *hm) { +long PGN::ParseHalfMove(long loc, HalfMove *hm) { // Goto next char loc = NextNonBlank(loc); EOF_CHECK(loc); char c = pgn_content[loc]; // Check if we reach score entry (* or 1-0 or 0-1 or 1/2-1/2) - if (!IS_EOF(loc + 1)) { - char nc = pgn_content[loc + 1]; // Next c - if ((IS_DIGIT(c) && nc == '-') or (IS_DIGIT(c) && nc == '/') or c == '*') { - if (c == '*') { - result = "*"; - } else if (nc == '-') { - if (c == '1') { - result = "1-0"; - loc += 2; - } else { - result = "0-1"; - loc += 2; - } - } else { - result = "1/2-1/2"; - loc += 6; - } - return (loc); - } + if (c == '*') { + result = "*"; + return (loc + 1); } - // Parse (move number + // Parse move number and check if end of game if (IS_DIGIT(c)) { std::string move_nb; + char first_digit = c; while (IS_DIGIT(c)) { move_nb += c; loc++; c = pgn_content[loc]; EOF_CHECK(loc); + if (c == '/' || c == '-') { + if (c == '/') { + result = "1/2-1/2"; + return (loc + 6); + } else if (first_digit == '1') { + result = "1-0"; + return (loc + 2); + } else { + result = "0-1"; + return (loc + 2); + } + } } hm->count = std::stoi(move_nb); loc++; @@ -172,10 +169,6 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) { hm->isBlack = true; } - // Parse comment entries (various comment could appear during HalfMove - // parsing) - loc = ParseComment(loc, hm); - // Parse the HalfMove loc = NextNonBlank(loc); EOF_CHECK(loc); @@ -189,13 +182,21 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) { } hm->move = move; - // Parse comment - loc = ParseComment(loc, hm); - - // Skip end of variation - if (c == ')') { + // Check for NAG + loc = NextNonBlank(loc); + EOF_CHECK(loc); + c = pgn_content[loc]; + if (c == '$') { + hm->NAG += c; loc++; - return (loc); + EOF_CHECK(loc); + c = pgn_content[loc]; + while (IS_DIGIT(c)) { + hm->NAG += c; + loc++; + EOF_CHECK(loc); + c = pgn_content[loc]; + } } // Parse comment @@ -209,10 +210,19 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) { loc = ParseHalfMove(loc, var); hm->variations.push_back(var); loc++; // Skip ')' + // Goto next var + loc = NextNonBlank(loc); + EOF_CHECK(loc); + c = pgn_content[loc]; } - // Parse comment - loc = ParseComment(loc, hm); + // Skip end of variation + loc = NextNonBlank(loc); + EOF_CHECK(loc); + c = pgn_content[loc]; + if (c == ')') { + return (loc); + } // Parse next HalfMove loc = NextNonBlank(loc); @@ -231,10 +241,10 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) { return (loc); } -int PGN::ParseNextTag(int start_loc) { +long PGN::ParseNextTag(long start_loc) { // Parse key std::string key; - int keyloc = start_loc + 1; + long keyloc = start_loc + 1; EOF_CHECK(keyloc); char c = pgn_content[keyloc]; while (!IS_BLANK(c)) { @@ -246,7 +256,7 @@ int PGN::ParseNextTag(int start_loc) { // Parse value std::string value; - int valueloc = NextNonBlank(keyloc) + 1; + long valueloc = NextNonBlank(keyloc) + 1; EOF_CHECK(keyloc); c = pgn_content[valueloc]; while (c != '"' or IS_EOF(valueloc)) { @@ -294,14 +304,16 @@ std::string PGN::Dump() { return (ss.str()); } -int PGN::NextNonBlank(int loc) { +long PGN::NextNonBlank(long loc) { char c = pgn_content[loc]; while (IS_BLANK(c)) { loc++; - if (IS_EOF(loc)) + if (IS_EOF(loc)) { return (loc); + } c = pgn_content[loc]; } + return (loc); } diff --git a/src/PGN.hpp b/src/PGN.hpp index c4e27d0..7e4d0e2 100644 --- a/src/PGN.hpp +++ b/src/PGN.hpp @@ -1,4 +1,5 @@ #include "HalfMove.hpp" +#include "LargeFileStream.hpp" #include #include #include @@ -17,9 +18,10 @@ private: /// @brief Contains the parsed PGN moves HalfMove *moves; /// @brief Contains the PGN data - std::string pgn_content; - /// @brief Contains the location of the end of the last parsed game (1 PGN file may have multiple games) - int LastGameEndLoc; + LargeFileStream pgn_content; + /// @brief Contains the location of the end of the last parsed game (1 PGN + /// file may have multiple games) + long LastGameEndLoc; public: PGN(); @@ -27,8 +29,9 @@ public: void FromFile(std::string); void FromString(std::string); /** - * Parse the next available game. Note that it raises a @a NoGameFound exception if no more game is available. - * A call to this method flush all the last parsed game data. Be careful. + * Parse the next available game. Note that it raises a @a NoGameFound + * exception if no more game is available. A call to this method flush all the + * last parsed game data. Be careful. */ void ParseNextGame(); /// @brief Check if PGN contains a specific tag @@ -49,13 +52,13 @@ public: private: /// @brief Populate @a tags with by parsing the one starting at location in /// argument - int ParseNextTag(int); + long ParseNextTag(long); /// @brief Get the next non-blank char location starting from location in /// argument - int NextNonBlank(int); + long NextNonBlank(long); /// @brief Parse a HalfMove at a specific location into @a pgn_content - int ParseHalfMove(int, HalfMove *); - int ParseComment(int,HalfMove *); + long ParseHalfMove(long, HalfMove *); + long ParseComment(long, HalfMove *); }; struct UnexpectedEOF : public std::exception { @@ -76,7 +79,7 @@ struct NoGameFound : public std::exception { struct UnexpectedCharacter : public std::exception { std::string msg; - UnexpectedCharacter(char actual, char required, int loc) { + UnexpectedCharacter(char actual, char required, long loc) { std::stringstream ss; ss << "Expected \'" << required << "\' at location " << loc << " but read \'" << actual << "\'";