- Enable NAG parsing

- Add the LargeFileStream interface to handle large file
- Debug parsing
This commit is contained in:
Loic Guegan 2022-01-26 12:03:24 +01:00
parent 8a770f9133
commit bb914f047b
8 changed files with 205 additions and 71 deletions

View file

@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
project(pgnp)
# Shared library
add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp)
add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp src/LargeFileStream.cpp)
# Includes
set(PGNP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/includes) # For conveniance
@ -10,6 +10,7 @@ set(PGNP_INCLUDE_DIR ${PGNP_INCLUDE_DIR} PARENT_SCOPE) # To be used by other pro
file(MAKE_DIRECTORY ${PGNP_INCLUDE_DIR})
configure_file(src/PGN.hpp ${PGNP_INCLUDE_DIR}/pgnp.hpp COPYONLY)
configure_file(src/HalfMove.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
configure_file(src/LargeFileStream.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
include_directories(${PGNP_INCLUDE_DIR})

View file

@ -4,8 +4,10 @@ PGNP is a Portable Game Notation (PGN) parser. More details about the
PGN specification can be found [here](https://www.chessclub.com/help/PGN-spec).
# Features
- Basic PGN parsing (tags, move, comments, variations etc.)
- Basic PGN parsing (tags, move, comments, variations, NAG, etc.)
- Merged PGN files parsing (several games in one file)
- Handle very large file (severals GB)
- Very efficient
# How to use it ?
PGNP can be used as a shared library in your project.

View file

@ -19,8 +19,8 @@ std::string HalfMove::NestedDump(HalfMove *m, int indent) {
ss << " "
<< " Move=" << m->move << " Count=" << m->count << " Comment=\""
<< m->comment << "\""
<< " IsBlack=" << m->isBlack << " Variations=" << m->variations.size()
<< std::endl;
<< " NAG=" << m->NAG << " IsBlack=" << m->isBlack
<< " Variations=" << m->variations.size() << std::endl;
for (auto *var : m->variations) {
ss << NestedDump(var, indent + 1);
@ -65,15 +65,15 @@ void HalfMove::Copy(HalfMove *copy) {
}
HalfMove *HalfMove::GetHalfMoveAt(int distance) {
HalfMove *tmp=this;
while(distance>0){
if(tmp==NULL){
HalfMove *tmp = this;
while (distance > 0) {
if (tmp == NULL) {
throw HalfMoveOutOfRange();
}
distance--;
tmp=tmp->MainLine;
tmp = tmp->MainLine;
}
return(tmp);
return (tmp);
}
} // namespace pgnp

View file

@ -21,6 +21,7 @@ public:
std::string move;
/// @brief Comment associated to the move
std::string comment;
std::string NAG;
/// @brief Next HalfMove link to this line
HalfMove *MainLine;
/// @brief Next HalfMove links to variation of this line

63
src/LargeFileStream.cpp Normal file
View file

@ -0,0 +1,63 @@
#include "LargeFileStream.hpp"
namespace pgnp {
using namespace std;
LargeFileStream::LargeFileStream()
: chuck_count(-1), last_read_size(0), last_loc(0), use_string(false),
eof(false) {}
void LargeFileStream::FromFile(std::string filepath) {
file.open(filepath);
ReadNextChunk();
}
void LargeFileStream::FromString(std::string content) {
use_string = true;
this->content = content;
}
void LargeFileStream::ReadNextChunk() {
chuck_count++;
file.read(buffer, BUFFER_SIZE);
last_read_size = file.gcount();
}
char LargeFileStream::operator[](long loc) {
// Perform various checks
if (eof) {
throw ReadToFar();
}
if (loc < last_loc) {
throw BackwardRead();
}
last_loc = loc; // Keep track
// Shortcut the operator for string content
if (use_string) {
if (loc >= content.size()) {
eof = true;
}
return ('?');
}
// Goto the right memory chuck
long loc_chunk_count = loc / BUFFER_SIZE;
while (chuck_count < loc_chunk_count) {
ReadNextChunk();
}
long offset = loc - (loc_chunk_count * BUFFER_SIZE);
// Ensure for EOF
if (!file && offset >= last_read_size) {
eof = true;
return ('?');
}
// Return character
return buffer[offset];
}
bool LargeFileStream::IsEOF(long loc) { return (eof); }
} // namespace pgnp

52
src/LargeFileStream.hpp Normal file
View file

@ -0,0 +1,52 @@
#define BUFFER_SIZE (1024 * 1024 / 2)
#include <fstream>
#include <iostream>
#include <string>
namespace pgnp {
using namespace std;
class LargeFileStream {
/// @brief File to load
ifstream file;
/// @brief In memory buffer
char buffer[BUFFER_SIZE];
/// @brief Number of chuck read minus 1
long chuck_count;
/// @brief Number of byte read during the last file access
long last_read_size;
/// @brief Keep track of the file offset (to prevent backward read)
long last_loc;
/// @brief Use a string as file content
std::string content;
/// @brief Use to shortcut some methods
bool use_string;
/// @brief End Of File ?
bool eof;
/// @brief Load the next chuck of data from disk to memory
void ReadNextChunk();
public:
LargeFileStream();
void FromFile(std::string filepath);
/// @brief Emulate file access with a string
void FromString(std::string content);
/// @brief Allow array like access to the file
char operator[](long loc);
/// @brief Check if we reach the EOF
bool IsEOF(long loc);
// Various Exceptions
struct BackwardRead : public std::exception {
const char *what() const throw() {
return "LargeFileStream cannot read backward";
}
};
struct ReadToFar : public std::exception {
const char *what() const throw() { return "You reach the end of the file"; }
};
};
} // namespace pgnp

View file

@ -7,7 +7,7 @@
#define IS_DIGIT(c) \
(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || \
c == '6' || c == '7' || c == '8' || c == '9')
#define IS_EOF(loc) ((loc) >= pgn_content.size())
#define IS_EOF(loc) (pgn_content.IsEOF(loc))
#define EOF_CHECK(loc) \
{ \
if (IS_EOF(loc)) \
@ -25,17 +25,10 @@ PGN::~PGN() {
std::string PGN::GetResult() { return (result); }
void PGN::FromFile(std::string filepath) {
std::ifstream inFile;
inFile.open(filepath);
std::stringstream strStream;
strStream << inFile.rdbuf();
this->pgn_content = strStream.str();
}
void PGN::FromFile(std::string filepath) { pgn_content.FromFile(filepath); }
void PGN::FromString(std::string pgn_content) {
this->pgn_content = pgn_content;
this->pgn_content.FromString(pgn_content);
}
void PGN::ParseNextGame() {
@ -46,20 +39,26 @@ void PGN::ParseNextGame() {
result = "";
tagkeys.clear();
tags.clear();
moves = new HalfMove();
// Search for new game
if (IS_EOF(LastGameEndLoc)) {
throw NoGameFound();
}
int loc = NextNonBlank(LastGameEndLoc);
if (IS_EOF(loc)) {
throw NoGameFound();
}
// Parse game
while (!IS_EOF(loc)) {
char c = pgn_content[loc];
if (!IS_BLANK(c)) {
if (c == '[') {
loc = ParseNextTag(loc);
} else if (IS_DIGIT(c)) {
loc = ParseHalfMove(loc, moves);
LastGameEndLoc = loc + 1; // Next game start 1 char after the last one
LastGameEndLoc = ParseHalfMove(loc, moves);
break;
} else if (c == '{') {
loc = ParseComment(loc, moves);
@ -68,6 +67,7 @@ void PGN::ParseNextGame() {
}
loc++;
}
if (result.size() <= 0) {
throw InvalidGameResult();
}
@ -102,7 +102,7 @@ bool PGN::HasTag(std::string key) {
return (std::find(tags.begin(), tags.end(), key) != tags.end());
}
int PGN::ParseComment(int loc, HalfMove *hm) {
long PGN::ParseComment(long loc, HalfMove *hm) {
// Goto next char
loc = NextNonBlank(loc);
EOF_CHECK(loc);
@ -123,42 +123,39 @@ int PGN::ParseComment(int loc, HalfMove *hm) {
return (loc);
}
int PGN::ParseHalfMove(int loc, HalfMove *hm) {
long PGN::ParseHalfMove(long loc, HalfMove *hm) {
// Goto next char
loc = NextNonBlank(loc);
EOF_CHECK(loc);
char c = pgn_content[loc];
// Check if we reach score entry (* or 1-0 or 0-1 or 1/2-1/2)
if (!IS_EOF(loc + 1)) {
char nc = pgn_content[loc + 1]; // Next c
if ((IS_DIGIT(c) && nc == '-') or (IS_DIGIT(c) && nc == '/') or c == '*') {
if (c == '*') {
result = "*";
} else if (nc == '-') {
if (c == '1') {
result = "1-0";
loc += 2;
} else {
result = "0-1";
loc += 2;
}
} else {
result = "1/2-1/2";
loc += 6;
}
return (loc);
}
if (c == '*') {
result = "*";
return (loc + 1);
}
// Parse (move number
// Parse move number and check if end of game
if (IS_DIGIT(c)) {
std::string move_nb;
char first_digit = c;
while (IS_DIGIT(c)) {
move_nb += c;
loc++;
c = pgn_content[loc];
EOF_CHECK(loc);
if (c == '/' || c == '-') {
if (c == '/') {
result = "1/2-1/2";
return (loc + 6);
} else if (first_digit == '1') {
result = "1-0";
return (loc + 2);
} else {
result = "0-1";
return (loc + 2);
}
}
}
hm->count = std::stoi(move_nb);
loc++;
@ -172,10 +169,6 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
hm->isBlack = true;
}
// Parse comment entries (various comment could appear during HalfMove
// parsing)
loc = ParseComment(loc, hm);
// Parse the HalfMove
loc = NextNonBlank(loc);
EOF_CHECK(loc);
@ -189,13 +182,21 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
}
hm->move = move;
// Parse comment
loc = ParseComment(loc, hm);
// Skip end of variation
if (c == ')') {
// Check for NAG
loc = NextNonBlank(loc);
EOF_CHECK(loc);
c = pgn_content[loc];
if (c == '$') {
hm->NAG += c;
loc++;
return (loc);
EOF_CHECK(loc);
c = pgn_content[loc];
while (IS_DIGIT(c)) {
hm->NAG += c;
loc++;
EOF_CHECK(loc);
c = pgn_content[loc];
}
}
// Parse comment
@ -209,10 +210,19 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
loc = ParseHalfMove(loc, var);
hm->variations.push_back(var);
loc++; // Skip ')'
// Goto next var
loc = NextNonBlank(loc);
EOF_CHECK(loc);
c = pgn_content[loc];
}
// Parse comment
loc = ParseComment(loc, hm);
// Skip end of variation
loc = NextNonBlank(loc);
EOF_CHECK(loc);
c = pgn_content[loc];
if (c == ')') {
return (loc);
}
// Parse next HalfMove
loc = NextNonBlank(loc);
@ -231,10 +241,10 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
return (loc);
}
int PGN::ParseNextTag(int start_loc) {
long PGN::ParseNextTag(long start_loc) {
// Parse key
std::string key;
int keyloc = start_loc + 1;
long keyloc = start_loc + 1;
EOF_CHECK(keyloc);
char c = pgn_content[keyloc];
while (!IS_BLANK(c)) {
@ -246,7 +256,7 @@ int PGN::ParseNextTag(int start_loc) {
// Parse value
std::string value;
int valueloc = NextNonBlank(keyloc) + 1;
long valueloc = NextNonBlank(keyloc) + 1;
EOF_CHECK(keyloc);
c = pgn_content[valueloc];
while (c != '"' or IS_EOF(valueloc)) {
@ -294,14 +304,16 @@ std::string PGN::Dump() {
return (ss.str());
}
int PGN::NextNonBlank(int loc) {
long PGN::NextNonBlank(long loc) {
char c = pgn_content[loc];
while (IS_BLANK(c)) {
loc++;
if (IS_EOF(loc))
if (IS_EOF(loc)) {
return (loc);
}
c = pgn_content[loc];
}
return (loc);
}

View file

@ -1,4 +1,5 @@
#include "HalfMove.hpp"
#include "LargeFileStream.hpp"
#include <algorithm>
#include <exception>
#include <fstream>
@ -17,9 +18,10 @@ private:
/// @brief Contains the parsed PGN moves
HalfMove *moves;
/// @brief Contains the PGN data
std::string pgn_content;
/// @brief Contains the location of the end of the last parsed game (1 PGN file may have multiple games)
int LastGameEndLoc;
LargeFileStream pgn_content;
/// @brief Contains the location of the end of the last parsed game (1 PGN
/// file may have multiple games)
long LastGameEndLoc;
public:
PGN();
@ -27,8 +29,9 @@ public:
void FromFile(std::string);
void FromString(std::string);
/**
* Parse the next available game. Note that it raises a @a NoGameFound exception if no more game is available.
* A call to this method flush all the last parsed game data. Be careful.
* Parse the next available game. Note that it raises a @a NoGameFound
* exception if no more game is available. A call to this method flush all the
* last parsed game data. Be careful.
*/
void ParseNextGame();
/// @brief Check if PGN contains a specific tag
@ -49,13 +52,13 @@ public:
private:
/// @brief Populate @a tags with by parsing the one starting at location in
/// argument
int ParseNextTag(int);
long ParseNextTag(long);
/// @brief Get the next non-blank char location starting from location in
/// argument
int NextNonBlank(int);
long NextNonBlank(long);
/// @brief Parse a HalfMove at a specific location into @a pgn_content
int ParseHalfMove(int, HalfMove *);
int ParseComment(int,HalfMove *);
long ParseHalfMove(long, HalfMove *);
long ParseComment(long, HalfMove *);
};
struct UnexpectedEOF : public std::exception {
@ -76,7 +79,7 @@ struct NoGameFound : public std::exception {
struct UnexpectedCharacter : public std::exception {
std::string msg;
UnexpectedCharacter(char actual, char required, int loc) {
UnexpectedCharacter(char actual, char required, long loc) {
std::stringstream ss;
ss << "Expected \'" << required << "\' at location " << loc
<< " but read \'" << actual << "\'";