Skip to content

feat: support importing R data files into JASP #5841

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ jobs:
sudo apt install libxcb-xkb-dev libxcb-xkb1 libxcb-xinerama0 libxkbcommon-dev libxkbcommon-x11-dev autoconf zlib1g zlib1g-dev cmake
sudo apt install gfortran build-essential flex libssl-dev libgl1-mesa-dev libsqlite3-dev
sudo apt install libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev #required by some r packgaes
sudo apt install libglpk-dev libcurl4-openssl-dev libmpfr-dev libfontconfig1-dev libcairo2-dev #required by some r packages
sudo apt install libglpk-dev libcurl4-openssl-dev libmpfr-dev libfontconfig1-dev libcairo2-dev netcdf-bin #required by some r packages
sudo apt install jags
sudo apt install libminizip-dev # required by freexl
sudo apt install librdata-dev
git clone https://github.com/jasp-stats/freexl.git
cd freexl && ./configure && make && sudo make install

Expand Down
2 changes: 1 addition & 1 deletion Common/utilenums.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define UTILENUMS_H
#include "enumutilities.h"

DECLARE_ENUM(FileTypeBase, jasp = 0, html, csv, txt, tsv, sav, zsav, ods, xls, xlsx, pdf, sas7bdat, sas7bcat, por, xpt, dta, database, empty, unknown );
DECLARE_ENUM(FileTypeBase, jasp = 0, html, csv, txt, tsv, sav, zsav, ods, xls, xlsx, pdf, sas7bdat, sas7bcat, por, xpt, dta, database, rdata, rds, empty, unknown );

//const QStringList Database::dbTypes() const should be updated if DbType is changed.
DECLARE_ENUM(DbType, NOTCHOSEN, QDB2, /*QIBASE,*/ QMYSQL, QOCI, QODBC, QPSQL, QSQLITE /*, QSQLITE2, QTDS*/ );
Expand Down
6 changes: 5 additions & 1 deletion Desktop/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ target_include_directories(
${PROJECT_SOURCE_DIR}/CommonData
${PROJECT_SOURCE_DIR}/QMLComponents
# ReadStat
$<$<PLATFORM_ID:Windows>:${RTOOLS_LIBREADSTAT_H}>
${LIBREADSTAT_INCLUDE_DIRS}
# librdata
${LIBRDATA_INCLUDE_DIRS}
# JSONCPP
$<$<PLATFORM_ID:Linux>:${_PKGCONFIG_LIB_JSONCPP_INCLUDEDIR}>
$<$<BOOL:${FLATPAK_USED}>:/app/include/QtCore5Compat>
Expand Down Expand Up @@ -158,6 +159,9 @@ target_link_libraries(
${LIBREADSTAT_LIBRARIES}
# MinGW's ReadStat
$<$<PLATFORM_ID:Windows>:${RTOOLS_LIBREADSTAT_DLL_A}>
# librdata -----------------------------------
${LIBRDATA_LIBRARIES}
$<$<PLATFORM_ID:Windows>:${RTOOLS_LIBRDATA_DLL_A}>
# JSONCPP
#$<$<PLATFORM_ID:Linux>:${_PKGCONFIG_LIB_JSONCPP_LIBRARIES}>
#$<$<PLATFORM_ID:Linux>:${_PKGCONFIG_LIB_JSONCPP_LINK_LIBRARIES}>
Expand Down
3 changes: 3 additions & 0 deletions Desktop/data/datasetloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "importers/odsimporter.h"
#include "importers/readstatimporter.h"
#include "importers/excelimporter.h"
#include "importers/rdataimporter.h"


#include <QFileInfo>
Expand Down Expand Up @@ -56,6 +57,8 @@ Importer* DataSetLoader::getImporter(const string & locator, const string &ext)
if( boost::iequals(ext,".xls") ||
boost::iequals(ext,".xlsx")) return new ExcelImporter();
if( ReadStatImporter::extSupported(ext)) return new ReadStatImporter(ext);
if( boost::iequals(ext,".rdata") ||
boost::iequals(ext,".rds")) return new RDataImporter(ext);

return nullptr; //If NULL then JASP will try to load it as a .jasp file (if the extension matches)
}
Expand Down
4 changes: 3 additions & 1 deletion Desktop/data/fileevent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ const std::string FileEvent::databaseStr() const

QString FileEvent::getProgressMsg() const
{
//jasp = 0, html, csv, txt, tsv, sav, zsav, ods, pdf, sas7bdat, sas7bcat, por, xpt, empty, unknown
//jasp = 0, html, csv, txt, tsv, sav, zsav, ods, xls, xlsx, pdf, sas7bdat, sas7bcat, por, xpt, dta, database, rdata, rds, empty, unknown
switch(_operation)
{
case FileEvent::FileOpen:
Expand All @@ -139,6 +139,8 @@ QString FileEvent::getProgressMsg() const
case Utils::FileType::sas7bcat: return tr("Importing SAS File");
case Utils::FileType::dta: return tr("Importing STATA File");
case Utils::FileType::jasp: return tr("Loading JASP File");
case Utils::FileType::rdata:
case Utils::FileType::rds: return tr("Loading R Data File");
default: return tr("Loading File");
}
break;
Expand Down
48 changes: 48 additions & 0 deletions Desktop/data/importers/rdata/rdataimportcolumn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//
// Copyright (C) 2013-2025 University of Amsterdam
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#include "rdataimportcolumn.h"
#include "timers.h"

RDataImportColumn::RDataImportColumn(ImportDataSet* importDataSet, std::string name) : ImportColumn(importDataSet, name)
{
}

RDataImportColumn::RDataImportColumn(ImportDataSet *importDataSet, std::string name, long reserve) : ImportColumn(importDataSet, name)
{
_data.reserve(reserve);
}

RDataImportColumn::~RDataImportColumn()
{
JASPTIMER_SCOPE(RDataImportColumn::~RDataImportColumn());
}

size_t RDataImportColumn::size() const
{
return _data.size();
}

void RDataImportColumn::addValue(const std::string &value)
{
_data.push_back(value);
}

const std::vector<std::string> &RDataImportColumn::getValues() const
{
return _data;
}
39 changes: 39 additions & 0 deletions Desktop/data/importers/rdata/rdataimportcolumn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//
// Copyright (C) 2013-2025 University of Amsterdam
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#ifndef RDATAIMPORTCOLUMN_H
#define RDATAIMPORTCOLUMN_H

#include "data/importers/importcolumn.h"

class RDataImportColumn : public ImportColumn
{
public:
RDataImportColumn(ImportDataSet *importDataSet, std::string name);
RDataImportColumn(ImportDataSet *importDataSet, std::string name, long reserve);
~RDataImportColumn() override;

size_t size() const override;
const stringvec &allValuesAsStrings() const override { return _data; }
void addValue(const std::string &value);
const stringvec &getValues() const;

private:
stringvec _data;
};

#endif // RDATAIMPORTCOLUMN_H
167 changes: 167 additions & 0 deletions Desktop/data/importers/rdata/readrdata.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//
// Copyright (C) 2013-2025 University of Amsterdam
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#include "readrdata.h"
#include "log.h"

RDataReader::RDataReader(const std::string &locator)
{
_filePath = locator;
}

void RDataReader::open()
{
if (_filePath.empty())
throw std::runtime_error("File path cannot be empty.");

_parser = rdata_parser_init();

rdata_set_table_handler(_parser, &_tableHandler);
rdata_set_column_handler(_parser, &_columnHandler);
rdata_set_text_value_handler(_parser, &_textValueHandler);
rdata_set_column_name_handler(_parser, &_columnNameHandler);
rdata_set_error_handler(_parser, &_errorHandler);
// rdata_set_value_label_handler(_parser, &_valueLabelHandler);

rdata_error_t result = rdata_parse(_parser, _filePath.c_str(), &_context);

_rowCount = _context.row_count;
_colCount = _context.column_count;
_columnNames = _context.column_names;
_column_data = _context.column_data;

if (result != RDATA_OK)
throw std::runtime_error("Failed to parse file");

if (_parser)
rdata_parser_free(_parser);
}

int RDataReader::_tableHandler(const char *name, void *ctx)
{
RDataCtx *context = static_cast<RDataCtx *>(ctx);

if (!name)
context->table_name = "Default_table";
else
context->table_name = name;

Log::log() << "Table Name: " << name << std::endl;

return 0;
}

int RDataReader::_columnHandler(const char *name, rdata_type_t type, void *data, long count, void *ctx)
{

RDataCtx *context = static_cast<RDataCtx *>(ctx);
// Log::log() << "Column name: " << name << ", Type: " << type << ", Count: " << count << std::endl;

context->column_count++;

if (count > context->row_count)
{
context->row_count = count;
}

stringvec column_values;

if (type == RDATA_TYPE_STRING)
{
// Initialize an empty vector for string type to be filled in _textValueHandler
context->column_data.emplace_back(stringvec(count, ""));
}
else
{
switch (type)
{
case RDATA_TYPE_INT32:
case RDATA_TYPE_LOGICAL:
{
int32_t *values = static_cast<int32_t *>(data);
for (size_t i = 0; i < count; i++)
column_values.push_back(std::to_string(values[i]));
break;
}
case RDATA_TYPE_REAL:
{
double *values = static_cast<double *>(data);
for (size_t i = 0; i < count; i++)
{
if (std::isnan(values[i]))
column_values.push_back("NA");
else
column_values.push_back(std::to_string(values[i]));
}
break;
}
default:
Log::log() << "Unsupported data type for column: " << name << std::endl;
break;
}

context->column_data.push_back(column_values);
}

return 0;
}

int RDataReader::_columnNameHandler(const char *value, int index, void *ctx)
{
RDataCtx *context = static_cast<RDataCtx *>(ctx);

if (index >= context->column_names.size())
context->column_names.resize(index + 1);

context->column_names[index] = value ? std::string(value) : "Column_" + std::to_string(index + 1);
//Log::log() << "Column name : " << value << " [Index " << index << "]: " << context->column_names[index] << std::endl;

return 0;
}

void RDataReader::_errorHandler(const char *error_message, void *ctx)
{
Log::log() << "Error: " << error_message << std::endl;
}

int RDataReader::_textValueHandler(const char *value, int index, void *ctx)
{
// This handled if data type in _columnHandler is "RDATA_TYPE_STRING", because it's empty!
RDataCtx *context = static_cast<RDataCtx *>(ctx);

if (context->column_data.empty())
{
Log::log() << "Error: _textValueHandler called before _columnHandler." << std::endl;
return 1; // Abort processing
}

size_t column_index = context->column_data.size() - 1;
if (index >= context->column_data[column_index].size())
{
context->column_data[column_index].resize(index + 1, "");
}

context->column_data[column_index][index] = value ? value : "NA";

return 0;
}

int RDataReader::_valueLabelHandler(const char *value, int index, void *ctx)
{
// TODO: implement importing factor level as label
return 0;
}
63 changes: 63 additions & 0 deletions Desktop/data/importers/rdata/readrdata.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//
// Copyright (C) 2013-2025 University of Amsterdam
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#ifndef RDATA_H
#define RDATA_H

#include <string>
#include "rdata.h"
#include "utils.h"

class RDataReader
{
public:
RDataReader(const std::string &_filePath);

void open();
size_t getRowCount() const { return _rowCount; }
size_t getColCount() const { return _colCount; }
stringvec getColumnNames() const { return _columnNames;}
stringvecvec getColData() const { return _column_data;}

private:
struct RDataCtx
{
size_t column_count = 0;
size_t row_count = 0;
const char *table_name = nullptr;
stringvec column_names;
stringvecvec column_data;
};

static int _tableHandler(const char *name, void *ctx);
static int _columnHandler(const char *name, rdata_type_t type, void *data, long count, void *ctx);
static int _columnNameHandler(const char *value, int index, void *ctx);
static int _textValueHandler(const char *value, int index, void *ctx);
static int _valueLabelHandler(const char *value, int index, void *ctx);
static void _errorHandler(const char *error_message, void *ctx);

std::string _filePath;
rdata_parser_t *_parser;

RDataCtx _context;
stringvec _columnNames;
size_t _rowCount;
size_t _colCount;
stringvecvec _column_data;
};

#endif // RDATA_H
Loading
Loading