Improve parsing data type

This commit is contained in:
Loic Guegan 2022-01-26 20:50:24 +01:00
parent 0fa6b682f5
commit f144f14d9c
8 changed files with 45 additions and 28 deletions

View file

@ -11,6 +11,7 @@ file(MAKE_DIRECTORY ${PGNP_INCLUDE_DIR})
configure_file(src/PGN.hpp ${PGNP_INCLUDE_DIR}/pgnp.hpp COPYONLY)
configure_file(src/HalfMove.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
configure_file(src/LargeFileStream.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
configure_file(src/Types.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
include_directories(${PGNP_INCLUDE_DIR})

View file

@ -9,8 +9,8 @@ PGN specification can be found [here](https://www.chessclub.com/help/PGN-spec).
# Features
- Basic PGN parsing (tags, move, comments, variations, NAG, etc.)
- Merged PGN files parsing (several games in one file)
- Handle very large file (severals GB)
- Very efficient
- Handle very large file (max is 2^(sizeof(unsigned long long)) bytes)
- Efficiency
# How to use it ?
PGNP can be used as a shared library in your project.

View file

@ -1,3 +1,6 @@
#pragma once
#include "Types.hpp"
#include <sstream>
#include <string>
#include <vector>
@ -35,11 +38,13 @@ public:
std::string Dump();
/// @brief Perform a deep copy of a HalfMove
void Copy(HalfMove *copy);
/// @brief Get HalfMove located x down the MainLine
HalfMove* GetHalfMoveAt(int);
/// @brief Get HalfMove located x down the MainLine
HalfMove *GetHalfMoveAt(int);
};
struct HalfMoveOutOfRange : public std::exception {
const char *what() const throw() { return "HalfMove distance is out of range"; }
const char *what() const throw() {
return "HalfMove distance is out of range";
}
};
} // namespace pgnp

View file

@ -23,7 +23,7 @@ void LargeFileStream::ReadNextChunk() {
last_read_size = file.gcount();
}
char LargeFileStream::operator[](long loc) {
char LargeFileStream::operator[](ull loc) {
// Perform various checks
if (eof) {
throw ReadToFar();
@ -42,11 +42,11 @@ char LargeFileStream::operator[](long loc) {
}
// Goto the right memory chuck
long loc_chunk_count = loc / BUFFER_SIZE;
ull loc_chunk_count = loc / BUFFER_SIZE;
while (chuck_count < loc_chunk_count) {
ReadNextChunk();
}
long offset = loc - (loc_chunk_count * BUFFER_SIZE);
ull offset = loc - (loc_chunk_count * BUFFER_SIZE);
// Ensure for EOF
if (!file && offset >= last_read_size) {

View file

@ -1,9 +1,12 @@
#define BUFFER_SIZE (1024 * 1024 / 2)
#pragma once
#include "Types.hpp"
#include <fstream>
#include <iostream>
#include <string>
#define BUFFER_SIZE (1024 * 1024 / 2)
namespace pgnp {
using namespace std;
@ -13,11 +16,11 @@ class LargeFileStream {
/// @brief In memory buffer
char buffer[BUFFER_SIZE];
/// @brief Number of chuck read minus 1
long chuck_count;
ull chuck_count;
/// @brief Number of byte read during the last file access
long last_read_size;
ull last_read_size;
/// @brief Keep track of the file offset (to prevent backward read)
long last_loc;
ull last_loc;
/// @brief Use a string as file content
std::string content;
/// @brief Use to shortcut some methods
@ -34,7 +37,7 @@ public:
/// @brief Emulate file access with a string
void FromString(std::string content);
/// @brief Allow array like access to the file
char operator[](long loc);
char operator[](ull loc);
/// @brief Check if we reach the EOF
bool IsEOF();

View file

@ -45,7 +45,7 @@ void PGN::ParseNextGame() {
if (IS_EOF) {
throw NoGameFound();
}
long loc = GotoNextToken(LastGameEndLoc);
ull loc = GotoNextToken(LastGameEndLoc);
if (IS_EOF) {
throw NoGameFound();
}
@ -103,7 +103,7 @@ bool PGN::HasTag(std::string key) {
return (std::find(tags.begin(), tags.end(), key) != tags.end());
}
long PGN::ParseComment(long loc, HalfMove *hm) {
ull PGN::ParseComment(ull loc, HalfMove *hm) {
// Goto next char
loc = GotoNextToken(loc);
EOF_CHECK(loc);
@ -131,7 +131,7 @@ long PGN::ParseComment(long loc, HalfMove *hm) {
return (loc);
}
long PGN::ParseHalfMove(long loc, HalfMove *hm) {
ull PGN::ParseHalfMove(ull loc, HalfMove *hm) {
// Goto next char
loc = GotoNextToken(loc);
EOF_CHECK(loc);
@ -249,10 +249,10 @@ long PGN::ParseHalfMove(long loc, HalfMove *hm) {
return (loc);
}
long PGN::ParseNextTag(long start_loc) {
ull PGN::ParseNextTag(ull start_loc) {
// Parse key
std::string key;
long keyloc = start_loc + 1;
ull keyloc = start_loc + 1;
EOF_CHECK(keyloc);
char c = pgn_content[keyloc];
while (!IS_BLANK(c)) {
@ -264,7 +264,7 @@ long PGN::ParseNextTag(long start_loc) {
// Parse value
std::string value;
long valueloc = GotoNextToken(keyloc) + 1;
ull valueloc = GotoNextToken(keyloc) + 1;
EOF_CHECK(keyloc);
c = pgn_content[valueloc];
while (c != '"' or IS_EOF) {
@ -312,7 +312,7 @@ std::string PGN::Dump() {
return (ss.str());
}
long PGN::GotoNextToken(long loc) {
ull PGN::GotoNextToken(ull loc) {
char c = pgn_content[loc];
while (IS_BLANK(c)) {
loc++;
@ -331,7 +331,7 @@ long PGN::GotoNextToken(long loc) {
return (loc);
}
long PGN::GotoEOL(long loc) {
ull PGN::GotoEOL(ull loc) {
char c = pgn_content[loc];
while (true) {
loc++;

View file

@ -1,5 +1,8 @@
#pragma once
#include "HalfMove.hpp"
#include "LargeFileStream.hpp"
#include "Types.hpp"
#include <algorithm>
#include <exception>
#include <fstream>
@ -21,7 +24,7 @@ private:
LargeFileStream pgn_content;
/// @brief Contains the location of the end of the last parsed game (1 PGN
/// file may have multiple games)
long LastGameEndLoc;
ull LastGameEndLoc;
public:
PGN();
@ -52,16 +55,16 @@ public:
private:
/// @brief Populate @a tags with by parsing the one starting at location in
/// argument
long ParseNextTag(long);
ull ParseNextTag(ull);
/// @brief Parse a HalfMove at a specific location into @a pgn_content
long ParseHalfMove(long, HalfMove *);
ull ParseHalfMove(ull, HalfMove *);
/// @brief Parse a consecutive sequence of comment
long ParseComment(long, HalfMove *);
ull ParseComment(ull, HalfMove *);
/// @brief Get the next non-blank char location ignoring line comments ('%'
/// and ';')
long GotoNextToken(long);
ull GotoNextToken(ull);
/// @brief Goto the end of the current line
long GotoEOL(long);
ull GotoEOL(ull);
};
struct UnexpectedEOF : public std::exception {
@ -82,7 +85,7 @@ struct NoGameFound : public std::exception {
struct UnexpectedCharacter : public std::exception {
std::string msg;
UnexpectedCharacter(char actual, char required, long loc) {
UnexpectedCharacter(char actual, char required, ull loc) {
std::stringstream ss;
ss << "Expected \'" << required << "\' at location " << loc
<< " but read \'" << actual << "\'";

5
src/Types.hpp Normal file
View file

@ -0,0 +1,5 @@
#pragma once
namespace pgnp {
typedef unsigned long long ull;
}