/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* */
/* This file is part of the program and library */
/* PaPILO --- Parallel Presolve for Integer and Linear Optimization */
/* */
/* Copyright (C) 2020-2022 Konrad-Zuse-Zentrum */
/* fuer Informationstechnik Berlin */
/* */
/* This program is free software: you can redistribute it and/or modify */
/* it under the terms of the GNU Lesser General Public License as published */
/* by the Free Software Foundation, either version 3 of the License, or */
/* (at your option) any later version. */
/* */
/* This program is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
/* GNU Lesser General Public License for more details. */
/* */
/* You should have received a copy of the GNU Lesser General Public License */
/* along with this program. If not, see . */
/* */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#ifndef _PAPILO_IO_MPS_PARSER_HPP_
#define _PAPILO_IO_MPS_PARSER_HPP_
#include "papilo/Config.hpp"
#include "papilo/core/ConstraintMatrix.hpp"
#include "papilo/core/Objective.hpp"
#include "papilo/core/Problem.hpp"
#include "papilo/core/VariableDomains.hpp"
#include "papilo/misc/Flags.hpp"
#include "papilo/misc/Hash.hpp"
#include "papilo/misc/Num.hpp"
#include "papilo/external/pdqsort/pdqsort.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef PAPILO_USE_BOOST_IOSTREAMS_WITH_BZIP2
#include
#endif
#ifdef PAPILO_USE_BOOST_IOSTREAMS_WITH_ZLIB
#include
#endif
namespace papilo
{
template ::is_floating_point>
struct RealParseType
{
using type = double;
};
template
struct RealParseType
{
using type = REAL;
};
/// Parser for mps files in fixed and free format
template
class MpsParser
{
static_assert(
num_traits::type>::is_floating_point,
"the parse type must be a floating point type" );
public:
static boost::optional>
loadProblem( const std::string& filename )
{
MpsParser parser;
Problem problem;
if( !parser.parseFile( filename ) )
return boost::none;
assert( parser.nnz >= 0 );
Vec obj_vec( size_t( parser.nCols ), REAL{ 0.0 } );
for( auto i : parser.coeffobj )
obj_vec[i.first] = i.second;
problem.setObjective( std::move( obj_vec ), parser.objoffset );
problem.setConstraintMatrix(
SparseStorage{ std::move( parser.entries ), parser.nCols,
parser.nRows, true },
std::move( parser.rowlhs ), std::move( parser.rowrhs ),
std::move( parser.row_flags ), true );
problem.setVariableDomains( std::move( parser.lb4cols ),
std::move( parser.ub4cols ),
std::move( parser.col_flags ) );
problem.setVariableNames( std::move( parser.colnames ) );
problem.setName( std::move( filename ) );
problem.setConstraintNames( std::move( parser.rownames ) );
problem.setInputTolerance(
REAL{ pow( typename RealParseType::type{ 10 },
-std::numeric_limits<
typename RealParseType::type>::digits10 ) } );
return problem;
}
private:
MpsParser() {}
/// load LP from MPS file as transposed triplet matrix
bool
parseFile( const std::string& filename );
bool
parse( boost::iostreams::filtering_istream& file );
enum class boundtype
{
kLE,
kEq,
kGE
};
enum class parsekey
{
kRows,
kCols,
kRhs,
kRanges,
kBounds,
kNone,
kEnd,
kFail,
kComment
};
void
printErrorMessage( parsekey keyword )
{
switch( keyword )
{
case parsekey::kRows:
std::cerr << "read error in section ROWS " << std::endl;
break;
case parsekey::kCols:
std::cerr << "read error in section COLUMNS " << std::endl;
break;
case parsekey::kRhs:
std::cerr << "read error in section RHS " << std::endl;
break;
case parsekey::kBounds:
std::cerr << "read error in section BOUNDS " << std::endl;
break;
case parsekey::kRanges:
std::cerr << "read error in section RANGES " << std::endl;
break;
default:
std::cerr << "undefined read error " << std::endl;
break;
}
};
/*
* data for mps problem
*/
Vec> entries;
Vec> coeffobj;
Vec rowlhs;
Vec rowrhs;
Vec rownames;
Vec colnames;
HashMap rowname2idx;
HashMap colname2idx;
Vec lb4cols;
Vec ub4cols;
Vec row_type;
Vec row_flags;
Vec col_flags;
REAL objoffset = 0;
int nCols = 0;
int nRows = 0;
int nnz = -1;
/// checks first word of strline and wraps it by it_begin and it_end
parsekey
checkFirstWord( std::string& strline, std::string::iterator& it,
boost::string_ref& word_ref ) const;
parsekey
parseDefault( boost::iostreams::filtering_istream& file ) const;
parsekey
parseRows( boost::iostreams::filtering_istream& file,
Vec& rowtype );
parsekey
parseCols( boost::iostreams::filtering_istream& file,
const Vec& rowtype );
parsekey
parseRhs( boost::iostreams::filtering_istream& file );
parsekey
parseRanges( boost::iostreams::filtering_istream& file );
parsekey
parseBounds( boost::iostreams::filtering_istream& file );
};
template
typename MpsParser::parsekey
MpsParser::checkFirstWord( std::string& strline,
std::string::iterator& it,
boost::string_ref& word_ref ) const
{
using namespace boost::spirit;
it = strline.begin() + strline.find_first_not_of( " " );
std::string::iterator it_start = it;
// TODO: Daniel
qi::parse( it, strline.end(), qi::lexeme[+qi::graph] );
const std::size_t length = std::distance( it_start, it );
boost::string_ref word( &( *it_start ), length );
word_ref = word;
if( word.front() == 'R' ) // todo
{
if( word == "ROWS" )
return MpsParser::parsekey::kRows;
else if( word == "RHS" )
return MpsParser::parsekey::kRhs;
else if( word == "RANGES" )
return MpsParser::parsekey::kRanges;
else
return MpsParser::parsekey::kNone;
}
else if( word == "COLUMNS" )
return MpsParser::parsekey::kCols;
else if( word == "BOUNDS" )
return MpsParser::parsekey::kBounds;
else if( word == "ENDATA" )
return MpsParser::parsekey::kEnd;
else
return MpsParser::parsekey::kNone;
}
template
typename MpsParser::parsekey
MpsParser::parseDefault( boost::iostreams::filtering_istream& file ) const
{
std::string strline;
getline( file, strline );
std::string::iterator it;
boost::string_ref word_ref;
return checkFirstWord( strline, it, word_ref );
}
template
typename MpsParser::parsekey
MpsParser::parseRows( boost::iostreams::filtering_istream& file,
Vec& rowtype )
{
using namespace boost::spirit;
std::string strline;
size_t nrows = 0;
bool hasobj = false;
while( getline( file, strline ) )
{
bool isobj = false;
std::string::iterator it;
boost::string_ref word_ref;
MpsParser::parsekey key = checkFirstWord( strline, it, word_ref );
// start of new section?
if( key != parsekey::kNone )
{
nRows = int( nrows );
if( !hasobj )
{
std::cout << "WARNING: no objective row found" << std::endl;
rowname2idx.emplace( "artificial_empty_objective", -1 );
}
return key;
}
if( word_ref.front() == 'G' )
{
rowlhs.push_back( REAL{ 0.0 } );
rowrhs.push_back( REAL{ 0.0 } );
row_flags.emplace_back( RowFlag::kRhsInf );
rowtype.push_back( boundtype::kGE );
}
else if( word_ref.front() == 'E' )
{
rowlhs.push_back( REAL{ 0.0 } );
rowrhs.push_back( REAL{ 0.0 } );
row_flags.emplace_back( RowFlag::kEquation );
rowtype.push_back( boundtype::kEq );
}
else if( word_ref.front() == 'L' )
{
rowlhs.push_back( REAL{ 0.0 } );
rowrhs.push_back( REAL{ 0.0 } );
row_flags.emplace_back( RowFlag::kLhsInf );
rowtype.push_back( boundtype::kLE );
}
// todo properly treat multiple free rows
else if( word_ref.front() == 'N' )
{
if( hasobj )
{
rowlhs.push_back( REAL{ 0.0 } );
rowrhs.push_back( REAL{ 0.0 } );
RowFlags rowf;
rowf.set( RowFlag::kLhsInf, RowFlag::kRhsInf );
row_flags.emplace_back( rowf );
rowtype.push_back( boundtype::kLE );
}
else
{
isobj = true;
hasobj = true;
}
}
else if( word_ref.empty() ) // empty line
continue;
else
return parsekey::kFail;
std::string rowname = ""; // todo use ref
// get row name
qi::phrase_parse( it, strline.end(), qi::lexeme[+qi::graph], ascii::space,
rowname ); // todo use ref
// todo whitespace in name possible?
auto ret = rowname2idx.emplace( rowname, isobj ? ( -1 ) : ( nrows++ ) );
if( !isobj )
rownames.push_back( rowname );
if( !ret.second )
{
std::cerr << "duplicate row " << rowname << std::endl;
return parsekey::kFail;
}
}
return parsekey::kFail;
}
template
typename MpsParser::parsekey
MpsParser::parseCols( boost::iostreams::filtering_istream& file,
const Vec& rowtype )
{
using namespace boost::spirit;
std::string colname = "";
std::string strline;
int rowidx;
int ncols = 0;
int colstart = 0;
bool integral_cols = false;
auto parsename = [&rowidx, this]( std::string name ) {
auto mit = rowname2idx.find( name );
assert( mit != rowname2idx.end() );
rowidx = mit->second;
if( rowidx >= 0 )
this->nnz++;
else
assert( -1 == rowidx );
};
auto addtuple = [&rowidx, &ncols,
this]( typename RealParseType::type coeff ) {
if( rowidx >= 0 )
entries.push_back(
std::make_tuple( ncols - 1, rowidx, REAL{ coeff } ) );
else
coeffobj.push_back( std::make_pair( ncols - 1, REAL{ coeff } ) );
};
while( getline( file, strline ) )
{
std::string::iterator it;
boost::string_ref word_ref;
MpsParser::parsekey key = checkFirstWord( strline, it, word_ref );
// start of new section?
if( key != parsekey::kNone )
{
if( ncols > 1 )
pdqsort( entries.begin() + colstart, entries.end(),
[]( Triplet a, Triplet b ) {
return std::get<1>( b ) > std::get<1>( a );
} );
return key;
}
// check for integrality marker
std::string marker = ""; // todo use ref
std::string::iterator it2 = it;
qi::phrase_parse( it2, strline.end(), qi::lexeme[+qi::graph],
ascii::space, marker );
if( marker == "'MARKER'" )
{
marker = "";
qi::phrase_parse( it2, strline.end(), qi::lexeme[+qi::graph],
ascii::space, marker );
if( ( integral_cols && marker != "'INTEND'" ) ||
( !integral_cols && marker != "'INTORG'" ) )
{
std::cerr << "integrality marker error " << std::endl;
return parsekey::kFail;
}
integral_cols = !integral_cols;
continue;
}
// new column?
if( !( word_ref == colname ) )
{
if( word_ref.empty() ) // empty line
continue;
colname = word_ref.to_string();
auto ret = colname2idx.emplace( colname, ncols++ );
colnames.push_back( colname );
if( !ret.second )
{
std::cerr << "duplicate column " << std::endl;
return parsekey::kFail;
}
assert( lb4cols.size() == col_flags.size() );
col_flags.emplace_back( integral_cols ? ColFlag::kIntegral
: ColFlag::kNone );
// initialize with default bounds
if( integral_cols )
{
lb4cols.push_back( REAL{ 0.0 } );
ub4cols.push_back( REAL{ 1.0 } );
}
else
{
lb4cols.push_back( REAL{ 0.0 } );
ub4cols.push_back( REAL{ 0.0 } );
col_flags.back().set( ColFlag::kUbInf );
}
assert( col_flags.size() == lb4cols.size() );
if( ncols > 1 )
pdqsort( entries.begin() + colstart, entries.end(),
[]( Triplet a, Triplet b ) {
return std::get<1>( b ) > std::get<1>( a );
} );
colstart = entries.size();
}
assert( ncols > 0 );
if( !qi::phrase_parse(
it, strline.end(),
+( qi::lexeme[qi::as_string[+qi::graph][( parsename )]] >>
qi::real_parser::type>()[(
addtuple )] ),
ascii::space ) )
return parsekey::kFail;
}
return parsekey::kFail;
}
template
typename MpsParser::parsekey
MpsParser::parseRanges( boost::iostreams::filtering_istream& file )
{
using namespace boost::spirit;
std::string strline;
assert( rowrhs.size() == rowlhs.size() );
while( getline( file, strline ) )
{
std::string::iterator it;
boost::string_ref word_ref;
MpsParser::parsekey key = checkFirstWord( strline, it, word_ref );
// start of new section?
if( key != parsekey::kNone && key != parsekey::kRanges )
return key;
if( word_ref.empty() )
continue;
int rowidx;
auto parsename = [&rowidx, this]( std::string name ) {
auto mit = rowname2idx.find( name );
assert( mit != rowname2idx.end() );
rowidx = mit->second;
assert( rowidx >= 0 && rowidx < nRows );
};
auto addrange = [&rowidx,
this]( typename RealParseType::type val ) {
assert( size_t( rowidx ) < rowrhs.size() );
if( row_type[rowidx] == boundtype::kGE )
{
row_flags[rowidx].unset( RowFlag::kRhsInf );
rowrhs[rowidx] = rowlhs[rowidx] + REAL(abs( val ));
}
else if( row_type[rowidx] == boundtype::kLE )
{
row_flags[rowidx].unset( RowFlag::kLhsInf );
rowlhs[rowidx] = rowrhs[rowidx] - REAL(abs( val ));
}
else
{
assert( row_type[rowidx] == boundtype::kEq );
assert( rowrhs[rowidx] == rowlhs[rowidx] );
assert( row_flags[rowidx].test(RowFlag::kEquation) );
if( val > REAL{ 0.0 } )
{
row_flags[rowidx].unset( RowFlag::kEquation );
rowrhs[rowidx] = rowrhs[rowidx] + REAL( val );
}
else if( val < REAL{ 0.0 } )
{
rowlhs[rowidx] = rowlhs[rowidx] + REAL( val );
row_flags[rowidx].unset( RowFlag::kEquation );
}
}
};
// compulsory part
if( !qi::phrase_parse(
it, strline.end(),
+( qi::lexeme[qi::as_string[+qi::graph][( parsename )]] >>
qi::real_parser::type>()[(
addrange )] ),
ascii::space ) )
return parsekey::kFail;
// optional part todo don't replicate code
qi::phrase_parse(
it, strline.end(),
+( qi::lexeme[qi::as_string[+qi::graph][( parsename )]] >>
qi::real_parser::type>()[(
addrange )] ),
ascii::space );
}
return parsekey::kFail;
}
template
typename MpsParser::parsekey
MpsParser::parseRhs( boost::iostreams::filtering_istream& file )
{
using namespace boost::spirit;
std::string strline;
while( getline( file, strline ) )
{
std::string::iterator it;
boost::string_ref word_ref;
MpsParser::parsekey key = checkFirstWord( strline, it, word_ref );
// start of new section?
if( key != parsekey::kNone && key != parsekey::kRhs )
return key;
if( word_ref.empty() )
continue;
int rowidx;
auto parsename = [&rowidx, this]( std::string name ) {
auto mit = rowname2idx.find( name );
assert( mit != rowname2idx.end() );
rowidx = mit->second;
assert( rowidx >= -1 );
assert( rowidx < nRows );
};
auto addrhs = [&rowidx, this]( typename RealParseType::type val ) {
if( rowidx == -1 )
{
objoffset = -REAL{ val };
return;
}
if( row_type[rowidx] == boundtype::kEq ||
row_type[rowidx] == boundtype::kLE )
{
assert( size_t( rowidx ) < rowrhs.size() );
rowrhs[rowidx] = REAL{ val };
row_flags[rowidx].unset( RowFlag::kRhsInf );
}
if( row_type[rowidx] == boundtype::kEq ||
row_type[rowidx] == boundtype::kGE )
{
assert( size_t( rowidx ) < rowlhs.size() );
rowlhs[rowidx] = REAL{ val };
row_flags[rowidx].unset( RowFlag::kLhsInf );
}
};
// Documentation Link to qi:
// https://www.boost.org/doc/libs/1_66_0/libs/spirit/doc/html/spirit/qi/tutorials/warming_up.html
// +: Parse a one or more times
// lexeme[a]: Disable skip parsing for a, does pre-skipping
// as_string: Force atomic assignment for string attributes
// graph: Matches a character based on the equivalent of std::isgraph in the current character set
if( !qi::phrase_parse(
it, strline.end(),
+( qi::lexeme[qi::as_string[+qi::graph][( parsename )]] >>
qi::real_parser::type>()[(
addrhs )] ),
ascii::space ) )
return parsekey::kFail;
}
return parsekey::kFail;
}
template
typename MpsParser::parsekey
MpsParser::parseBounds( boost::iostreams::filtering_istream& file )
{
using namespace boost::spirit;
std::string strline;
Vec ub_is_default( lb4cols.size(), true );
Vec lb_is_default( lb4cols.size(), true );
while( getline( file, strline ) )
{
std::string::iterator it;
boost::string_ref word_ref;
MpsParser::parsekey key = checkFirstWord( strline, it, word_ref );
// start of new section?
if( key != parsekey::kNone )
return key;
if( word_ref.empty() )
continue;
bool islb = false;
bool isub = false;
bool isintegral = false;
bool isdefaultbound = false;
if( word_ref == "UP" ) // lower bound
isub = true;
else if( word_ref == "LO" ) // upper bound
islb = true;
else if( word_ref == "FX" ) // fixed
{
islb = true;
isub = true;
}
else if( word_ref == "MI" ) // infinite lower bound
{
islb = true;
isdefaultbound = true;
}
else if( word_ref == "PL" ) // infinite upper bound (redundant)
{
isub = true;
isdefaultbound = true;
}
else if( word_ref == "BV" ) // binary
{
isintegral = true;
isdefaultbound = true;
islb = true;
isub = true;
}
else if( word_ref == "LI" ) // integer lower bound
{
islb = true;
isintegral = true;
}
else if( word_ref == "UI" ) // integer upper bound
{
isub = true;
isintegral = true;
}
else if( word_ref == "FR" ) // free variable
{
islb = true;
isub = true;
isdefaultbound = true;
}
else
{
std::cerr << "unknown bound type " << word_ref << std::endl;
exit( 1 );
}
// parse over next word
qi::phrase_parse( it, strline.end(), qi::lexeme[+qi::graph],
ascii::space );
int colidx;
auto parsename = [&colidx, this]( std::string name ) {
auto mit = colname2idx.find( name );
assert( mit != colname2idx.end() );
colidx = mit->second;
assert( colidx >= 0 );
};
if( isdefaultbound )
{
if( !qi::phrase_parse(
it, strline.end(),
( qi::lexeme[qi::as_string[+qi::graph][( parsename )]] ),
ascii::space ) )
return parsekey::kFail;
if( isintegral ) // binary
{
if( islb )
lb4cols[colidx] = REAL{ 0.0 };
if( isub )
{
col_flags[colidx].unset( ColFlag::kUbInf );
ub4cols[colidx] = REAL{ 1.0 };
}
col_flags[colidx].set( ColFlag::kIntegral );
}
else
{
if( islb )
col_flags[colidx].set( ColFlag::kLbInf );
if( isub )
col_flags[colidx].set( ColFlag::kUbInf );
}
continue;
}
if( !qi::phrase_parse(
it, strline.end(),
+( qi::lexeme[qi::as_string[+qi::graph][( parsename )]] >>
qi::real_parser::type>()[(
[&ub_is_default, &lb_is_default, &colidx, &islb, &isub,
&isintegral,
this]( typename RealParseType::type val ) {
if( islb )
{
lb4cols[colidx] = REAL{ val };
lb_is_default[colidx] = false;
col_flags[colidx].unset( ColFlag::kLbInf );
}
if( isub )
{
ub4cols[colidx] = REAL{ val };
ub_is_default[colidx] = false;
col_flags[colidx].unset( ColFlag::kUbInf );
}
if( isintegral )
col_flags[colidx].set( ColFlag::kIntegral );
if( col_flags[colidx].test( ColFlag::kIntegral ) )
{
col_flags[colidx].set( ColFlag::kIntegral );
if( !islb && lb_is_default[colidx] )
lb4cols[colidx] = REAL{ 0.0 };
if( !isub && ub_is_default[colidx] )
col_flags[colidx].set( ColFlag::kUbInf );
}
} )] ),
ascii::space ) )
return parsekey::kFail;
}
return parsekey::kFail;
}
template
bool
MpsParser::parseFile( const std::string& filename )
{
std::ifstream file( filename, std::ifstream::in );
boost::iostreams::filtering_istream in;
if( !file )
return false;
#ifdef PAPILO_USE_BOOST_IOSTREAMS_WITH_ZLIB
if( boost::algorithm::ends_with( filename, ".gz" ) )
in.push( boost::iostreams::gzip_decompressor() );
#endif
#ifdef PAPILO_USE_BOOST_IOSTREAMS_WITH_BZIP2
if( boost::algorithm::ends_with( filename, ".bz2" ) )
in.push( boost::iostreams::bzip2_decompressor() );
#endif
in.push( file );
return parse( in );
}
template
bool
MpsParser::parse( boost::iostreams::filtering_istream& file )
{
nnz = 0;
parsekey keyword = parsekey::kNone;
parsekey keyword_old = parsekey::kNone;
// parsing loop
while( keyword != parsekey::kFail && keyword != parsekey::kEnd &&
!file.eof() && file.good() )
{
keyword_old = keyword;
switch( keyword )
{
case parsekey::kRows:
keyword = parseRows( file, row_type );
break;
case parsekey::kCols:
keyword = parseCols( file, row_type );
break;
case parsekey::kRhs:
keyword = parseRhs( file );
break;
case parsekey::kRanges:
keyword = parseRanges( file );
break;
case parsekey::kBounds:
keyword = parseBounds( file );
break;
case parsekey::kFail:
break;
default:
keyword = parseDefault( file );
break;
}
}
if( keyword == parsekey::kFail || keyword != parsekey::kEnd )
{
printErrorMessage( keyword_old );
return false;
}
assert( row_type.size() == unsigned( nRows ) );
nCols = colname2idx.size();
nRows = rowname2idx.size() - 1; // subtract obj row
return true;
}
} // namespace papilo
#endif /* _PARSING_MPS_PARSER_HPP_ */