mirror of
https://gitlab.com/manzerbredes/pgnp.git
synced 2025-04-05 17:46:25 +02:00
- Enable NAG parsing
- Add the LargeFileStream interface to handle large file - Debug parsing
This commit is contained in:
parent
8a770f9133
commit
bb914f047b
8 changed files with 205 additions and 71 deletions
|
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
|
|||
project(pgnp)
|
||||
|
||||
# Shared library
|
||||
add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp)
|
||||
add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp src/LargeFileStream.cpp)
|
||||
|
||||
# Includes
|
||||
set(PGNP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/includes) # For conveniance
|
||||
|
@ -10,6 +10,7 @@ set(PGNP_INCLUDE_DIR ${PGNP_INCLUDE_DIR} PARENT_SCOPE) # To be used by other pro
|
|||
file(MAKE_DIRECTORY ${PGNP_INCLUDE_DIR})
|
||||
configure_file(src/PGN.hpp ${PGNP_INCLUDE_DIR}/pgnp.hpp COPYONLY)
|
||||
configure_file(src/HalfMove.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
|
||||
configure_file(src/LargeFileStream.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
|
||||
|
||||
include_directories(${PGNP_INCLUDE_DIR})
|
||||
|
||||
|
|
|
@ -4,8 +4,10 @@ PGNP is a Portable Game Notation (PGN) parser. More details about the
|
|||
PGN specification can be found [here](https://www.chessclub.com/help/PGN-spec).
|
||||
|
||||
# Features
|
||||
- Basic PGN parsing (tags, move, comments, variations etc.)
|
||||
- Basic PGN parsing (tags, move, comments, variations, NAG, etc.)
|
||||
- Merged PGN files parsing (several games in one file)
|
||||
- Handle very large file (severals GB)
|
||||
- Very efficient
|
||||
|
||||
# How to use it ?
|
||||
PGNP can be used as a shared library in your project.
|
||||
|
|
|
@ -19,8 +19,8 @@ std::string HalfMove::NestedDump(HalfMove *m, int indent) {
|
|||
ss << " "
|
||||
<< " Move=" << m->move << " Count=" << m->count << " Comment=\""
|
||||
<< m->comment << "\""
|
||||
<< " IsBlack=" << m->isBlack << " Variations=" << m->variations.size()
|
||||
<< std::endl;
|
||||
<< " NAG=" << m->NAG << " IsBlack=" << m->isBlack
|
||||
<< " Variations=" << m->variations.size() << std::endl;
|
||||
|
||||
for (auto *var : m->variations) {
|
||||
ss << NestedDump(var, indent + 1);
|
||||
|
@ -65,15 +65,15 @@ void HalfMove::Copy(HalfMove *copy) {
|
|||
}
|
||||
|
||||
HalfMove *HalfMove::GetHalfMoveAt(int distance) {
|
||||
HalfMove *tmp=this;
|
||||
while(distance>0){
|
||||
if(tmp==NULL){
|
||||
HalfMove *tmp = this;
|
||||
while (distance > 0) {
|
||||
if (tmp == NULL) {
|
||||
throw HalfMoveOutOfRange();
|
||||
}
|
||||
distance--;
|
||||
tmp=tmp->MainLine;
|
||||
tmp = tmp->MainLine;
|
||||
}
|
||||
return(tmp);
|
||||
return (tmp);
|
||||
}
|
||||
|
||||
} // namespace pgnp
|
|
@ -21,6 +21,7 @@ public:
|
|||
std::string move;
|
||||
/// @brief Comment associated to the move
|
||||
std::string comment;
|
||||
std::string NAG;
|
||||
/// @brief Next HalfMove link to this line
|
||||
HalfMove *MainLine;
|
||||
/// @brief Next HalfMove links to variation of this line
|
||||
|
|
63
src/LargeFileStream.cpp
Normal file
63
src/LargeFileStream.cpp
Normal file
|
@ -0,0 +1,63 @@
|
|||
#include "LargeFileStream.hpp"
|
||||
|
||||
namespace pgnp {
|
||||
using namespace std;
|
||||
|
||||
LargeFileStream::LargeFileStream()
|
||||
: chuck_count(-1), last_read_size(0), last_loc(0), use_string(false),
|
||||
eof(false) {}
|
||||
|
||||
void LargeFileStream::FromFile(std::string filepath) {
|
||||
file.open(filepath);
|
||||
ReadNextChunk();
|
||||
}
|
||||
|
||||
void LargeFileStream::FromString(std::string content) {
|
||||
use_string = true;
|
||||
this->content = content;
|
||||
}
|
||||
|
||||
void LargeFileStream::ReadNextChunk() {
|
||||
chuck_count++;
|
||||
file.read(buffer, BUFFER_SIZE);
|
||||
last_read_size = file.gcount();
|
||||
}
|
||||
|
||||
char LargeFileStream::operator[](long loc) {
|
||||
// Perform various checks
|
||||
if (eof) {
|
||||
throw ReadToFar();
|
||||
}
|
||||
if (loc < last_loc) {
|
||||
throw BackwardRead();
|
||||
}
|
||||
last_loc = loc; // Keep track
|
||||
|
||||
// Shortcut the operator for string content
|
||||
if (use_string) {
|
||||
if (loc >= content.size()) {
|
||||
eof = true;
|
||||
}
|
||||
return ('?');
|
||||
}
|
||||
|
||||
// Goto the right memory chuck
|
||||
long loc_chunk_count = loc / BUFFER_SIZE;
|
||||
while (chuck_count < loc_chunk_count) {
|
||||
ReadNextChunk();
|
||||
}
|
||||
long offset = loc - (loc_chunk_count * BUFFER_SIZE);
|
||||
|
||||
// Ensure for EOF
|
||||
if (!file && offset >= last_read_size) {
|
||||
eof = true;
|
||||
return ('?');
|
||||
}
|
||||
|
||||
// Return character
|
||||
return buffer[offset];
|
||||
}
|
||||
|
||||
bool LargeFileStream::IsEOF(long loc) { return (eof); }
|
||||
|
||||
} // namespace pgnp
|
52
src/LargeFileStream.hpp
Normal file
52
src/LargeFileStream.hpp
Normal file
|
@ -0,0 +1,52 @@
|
|||
#define BUFFER_SIZE (1024 * 1024 / 2)
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace pgnp {
|
||||
using namespace std;
|
||||
|
||||
class LargeFileStream {
|
||||
/// @brief File to load
|
||||
ifstream file;
|
||||
/// @brief In memory buffer
|
||||
char buffer[BUFFER_SIZE];
|
||||
/// @brief Number of chuck read minus 1
|
||||
long chuck_count;
|
||||
/// @brief Number of byte read during the last file access
|
||||
long last_read_size;
|
||||
/// @brief Keep track of the file offset (to prevent backward read)
|
||||
long last_loc;
|
||||
/// @brief Use a string as file content
|
||||
std::string content;
|
||||
/// @brief Use to shortcut some methods
|
||||
bool use_string;
|
||||
/// @brief End Of File ?
|
||||
bool eof;
|
||||
|
||||
/// @brief Load the next chuck of data from disk to memory
|
||||
void ReadNextChunk();
|
||||
|
||||
public:
|
||||
LargeFileStream();
|
||||
void FromFile(std::string filepath);
|
||||
/// @brief Emulate file access with a string
|
||||
void FromString(std::string content);
|
||||
/// @brief Allow array like access to the file
|
||||
char operator[](long loc);
|
||||
/// @brief Check if we reach the EOF
|
||||
bool IsEOF(long loc);
|
||||
|
||||
// Various Exceptions
|
||||
struct BackwardRead : public std::exception {
|
||||
const char *what() const throw() {
|
||||
return "LargeFileStream cannot read backward";
|
||||
}
|
||||
};
|
||||
struct ReadToFar : public std::exception {
|
||||
const char *what() const throw() { return "You reach the end of the file"; }
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace pgnp
|
116
src/PGN.cpp
116
src/PGN.cpp
|
@ -7,7 +7,7 @@
|
|||
#define IS_DIGIT(c) \
|
||||
(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || \
|
||||
c == '6' || c == '7' || c == '8' || c == '9')
|
||||
#define IS_EOF(loc) ((loc) >= pgn_content.size())
|
||||
#define IS_EOF(loc) (pgn_content.IsEOF(loc))
|
||||
#define EOF_CHECK(loc) \
|
||||
{ \
|
||||
if (IS_EOF(loc)) \
|
||||
|
@ -25,17 +25,10 @@ PGN::~PGN() {
|
|||
|
||||
std::string PGN::GetResult() { return (result); }
|
||||
|
||||
void PGN::FromFile(std::string filepath) {
|
||||
std::ifstream inFile;
|
||||
inFile.open(filepath);
|
||||
std::stringstream strStream;
|
||||
strStream << inFile.rdbuf();
|
||||
|
||||
this->pgn_content = strStream.str();
|
||||
}
|
||||
void PGN::FromFile(std::string filepath) { pgn_content.FromFile(filepath); }
|
||||
|
||||
void PGN::FromString(std::string pgn_content) {
|
||||
this->pgn_content = pgn_content;
|
||||
this->pgn_content.FromString(pgn_content);
|
||||
}
|
||||
|
||||
void PGN::ParseNextGame() {
|
||||
|
@ -46,20 +39,26 @@ void PGN::ParseNextGame() {
|
|||
result = "";
|
||||
tagkeys.clear();
|
||||
tags.clear();
|
||||
|
||||
moves = new HalfMove();
|
||||
|
||||
// Search for new game
|
||||
if (IS_EOF(LastGameEndLoc)) {
|
||||
throw NoGameFound();
|
||||
}
|
||||
int loc = NextNonBlank(LastGameEndLoc);
|
||||
|
||||
if (IS_EOF(loc)) {
|
||||
throw NoGameFound();
|
||||
}
|
||||
|
||||
// Parse game
|
||||
while (!IS_EOF(loc)) {
|
||||
char c = pgn_content[loc];
|
||||
if (!IS_BLANK(c)) {
|
||||
if (c == '[') {
|
||||
loc = ParseNextTag(loc);
|
||||
} else if (IS_DIGIT(c)) {
|
||||
loc = ParseHalfMove(loc, moves);
|
||||
LastGameEndLoc = loc + 1; // Next game start 1 char after the last one
|
||||
LastGameEndLoc = ParseHalfMove(loc, moves);
|
||||
break;
|
||||
} else if (c == '{') {
|
||||
loc = ParseComment(loc, moves);
|
||||
|
@ -68,6 +67,7 @@ void PGN::ParseNextGame() {
|
|||
}
|
||||
loc++;
|
||||
}
|
||||
|
||||
if (result.size() <= 0) {
|
||||
throw InvalidGameResult();
|
||||
}
|
||||
|
@ -102,7 +102,7 @@ bool PGN::HasTag(std::string key) {
|
|||
return (std::find(tags.begin(), tags.end(), key) != tags.end());
|
||||
}
|
||||
|
||||
int PGN::ParseComment(int loc, HalfMove *hm) {
|
||||
long PGN::ParseComment(long loc, HalfMove *hm) {
|
||||
// Goto next char
|
||||
loc = NextNonBlank(loc);
|
||||
EOF_CHECK(loc);
|
||||
|
@ -123,42 +123,39 @@ int PGN::ParseComment(int loc, HalfMove *hm) {
|
|||
return (loc);
|
||||
}
|
||||
|
||||
int PGN::ParseHalfMove(int loc, HalfMove *hm) {
|
||||
long PGN::ParseHalfMove(long loc, HalfMove *hm) {
|
||||
// Goto next char
|
||||
loc = NextNonBlank(loc);
|
||||
EOF_CHECK(loc);
|
||||
char c = pgn_content[loc];
|
||||
|
||||
// Check if we reach score entry (* or 1-0 or 0-1 or 1/2-1/2)
|
||||
if (!IS_EOF(loc + 1)) {
|
||||
char nc = pgn_content[loc + 1]; // Next c
|
||||
if ((IS_DIGIT(c) && nc == '-') or (IS_DIGIT(c) && nc == '/') or c == '*') {
|
||||
if (c == '*') {
|
||||
result = "*";
|
||||
} else if (nc == '-') {
|
||||
if (c == '1') {
|
||||
result = "1-0";
|
||||
loc += 2;
|
||||
} else {
|
||||
result = "0-1";
|
||||
loc += 2;
|
||||
}
|
||||
} else {
|
||||
result = "1/2-1/2";
|
||||
loc += 6;
|
||||
}
|
||||
return (loc);
|
||||
}
|
||||
if (c == '*') {
|
||||
result = "*";
|
||||
return (loc + 1);
|
||||
}
|
||||
|
||||
// Parse (move number
|
||||
// Parse move number and check if end of game
|
||||
if (IS_DIGIT(c)) {
|
||||
std::string move_nb;
|
||||
char first_digit = c;
|
||||
while (IS_DIGIT(c)) {
|
||||
move_nb += c;
|
||||
loc++;
|
||||
c = pgn_content[loc];
|
||||
EOF_CHECK(loc);
|
||||
if (c == '/' || c == '-') {
|
||||
if (c == '/') {
|
||||
result = "1/2-1/2";
|
||||
return (loc + 6);
|
||||
} else if (first_digit == '1') {
|
||||
result = "1-0";
|
||||
return (loc + 2);
|
||||
} else {
|
||||
result = "0-1";
|
||||
return (loc + 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
hm->count = std::stoi(move_nb);
|
||||
loc++;
|
||||
|
@ -172,10 +169,6 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
|
|||
hm->isBlack = true;
|
||||
}
|
||||
|
||||
// Parse comment entries (various comment could appear during HalfMove
|
||||
// parsing)
|
||||
loc = ParseComment(loc, hm);
|
||||
|
||||
// Parse the HalfMove
|
||||
loc = NextNonBlank(loc);
|
||||
EOF_CHECK(loc);
|
||||
|
@ -189,13 +182,21 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
|
|||
}
|
||||
hm->move = move;
|
||||
|
||||
// Parse comment
|
||||
loc = ParseComment(loc, hm);
|
||||
|
||||
// Skip end of variation
|
||||
if (c == ')') {
|
||||
// Check for NAG
|
||||
loc = NextNonBlank(loc);
|
||||
EOF_CHECK(loc);
|
||||
c = pgn_content[loc];
|
||||
if (c == '$') {
|
||||
hm->NAG += c;
|
||||
loc++;
|
||||
return (loc);
|
||||
EOF_CHECK(loc);
|
||||
c = pgn_content[loc];
|
||||
while (IS_DIGIT(c)) {
|
||||
hm->NAG += c;
|
||||
loc++;
|
||||
EOF_CHECK(loc);
|
||||
c = pgn_content[loc];
|
||||
}
|
||||
}
|
||||
|
||||
// Parse comment
|
||||
|
@ -209,10 +210,19 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
|
|||
loc = ParseHalfMove(loc, var);
|
||||
hm->variations.push_back(var);
|
||||
loc++; // Skip ')'
|
||||
// Goto next var
|
||||
loc = NextNonBlank(loc);
|
||||
EOF_CHECK(loc);
|
||||
c = pgn_content[loc];
|
||||
}
|
||||
|
||||
// Parse comment
|
||||
loc = ParseComment(loc, hm);
|
||||
// Skip end of variation
|
||||
loc = NextNonBlank(loc);
|
||||
EOF_CHECK(loc);
|
||||
c = pgn_content[loc];
|
||||
if (c == ')') {
|
||||
return (loc);
|
||||
}
|
||||
|
||||
// Parse next HalfMove
|
||||
loc = NextNonBlank(loc);
|
||||
|
@ -231,10 +241,10 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
|
|||
return (loc);
|
||||
}
|
||||
|
||||
int PGN::ParseNextTag(int start_loc) {
|
||||
long PGN::ParseNextTag(long start_loc) {
|
||||
// Parse key
|
||||
std::string key;
|
||||
int keyloc = start_loc + 1;
|
||||
long keyloc = start_loc + 1;
|
||||
EOF_CHECK(keyloc);
|
||||
char c = pgn_content[keyloc];
|
||||
while (!IS_BLANK(c)) {
|
||||
|
@ -246,7 +256,7 @@ int PGN::ParseNextTag(int start_loc) {
|
|||
|
||||
// Parse value
|
||||
std::string value;
|
||||
int valueloc = NextNonBlank(keyloc) + 1;
|
||||
long valueloc = NextNonBlank(keyloc) + 1;
|
||||
EOF_CHECK(keyloc);
|
||||
c = pgn_content[valueloc];
|
||||
while (c != '"' or IS_EOF(valueloc)) {
|
||||
|
@ -294,14 +304,16 @@ std::string PGN::Dump() {
|
|||
return (ss.str());
|
||||
}
|
||||
|
||||
int PGN::NextNonBlank(int loc) {
|
||||
long PGN::NextNonBlank(long loc) {
|
||||
char c = pgn_content[loc];
|
||||
while (IS_BLANK(c)) {
|
||||
loc++;
|
||||
if (IS_EOF(loc))
|
||||
if (IS_EOF(loc)) {
|
||||
return (loc);
|
||||
}
|
||||
c = pgn_content[loc];
|
||||
}
|
||||
|
||||
return (loc);
|
||||
}
|
||||
|
||||
|
|
23
src/PGN.hpp
23
src/PGN.hpp
|
@ -1,4 +1,5 @@
|
|||
#include "HalfMove.hpp"
|
||||
#include "LargeFileStream.hpp"
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
|
@ -17,9 +18,10 @@ private:
|
|||
/// @brief Contains the parsed PGN moves
|
||||
HalfMove *moves;
|
||||
/// @brief Contains the PGN data
|
||||
std::string pgn_content;
|
||||
/// @brief Contains the location of the end of the last parsed game (1 PGN file may have multiple games)
|
||||
int LastGameEndLoc;
|
||||
LargeFileStream pgn_content;
|
||||
/// @brief Contains the location of the end of the last parsed game (1 PGN
|
||||
/// file may have multiple games)
|
||||
long LastGameEndLoc;
|
||||
|
||||
public:
|
||||
PGN();
|
||||
|
@ -27,8 +29,9 @@ public:
|
|||
void FromFile(std::string);
|
||||
void FromString(std::string);
|
||||
/**
|
||||
* Parse the next available game. Note that it raises a @a NoGameFound exception if no more game is available.
|
||||
* A call to this method flush all the last parsed game data. Be careful.
|
||||
* Parse the next available game. Note that it raises a @a NoGameFound
|
||||
* exception if no more game is available. A call to this method flush all the
|
||||
* last parsed game data. Be careful.
|
||||
*/
|
||||
void ParseNextGame();
|
||||
/// @brief Check if PGN contains a specific tag
|
||||
|
@ -49,13 +52,13 @@ public:
|
|||
private:
|
||||
/// @brief Populate @a tags with by parsing the one starting at location in
|
||||
/// argument
|
||||
int ParseNextTag(int);
|
||||
long ParseNextTag(long);
|
||||
/// @brief Get the next non-blank char location starting from location in
|
||||
/// argument
|
||||
int NextNonBlank(int);
|
||||
long NextNonBlank(long);
|
||||
/// @brief Parse a HalfMove at a specific location into @a pgn_content
|
||||
int ParseHalfMove(int, HalfMove *);
|
||||
int ParseComment(int,HalfMove *);
|
||||
long ParseHalfMove(long, HalfMove *);
|
||||
long ParseComment(long, HalfMove *);
|
||||
};
|
||||
|
||||
struct UnexpectedEOF : public std::exception {
|
||||
|
@ -76,7 +79,7 @@ struct NoGameFound : public std::exception {
|
|||
|
||||
struct UnexpectedCharacter : public std::exception {
|
||||
std::string msg;
|
||||
UnexpectedCharacter(char actual, char required, int loc) {
|
||||
UnexpectedCharacter(char actual, char required, long loc) {
|
||||
std::stringstream ss;
|
||||
ss << "Expected \'" << required << "\' at location " << loc
|
||||
<< " but read \'" << actual << "\'";
|
||||
|
|
Loading…
Add table
Reference in a new issue