Home

Awesome

Unify :link: <a href="https://travis-ci.org/r-lyeh/unify"><img src="https://api.travis-ci.org/r-lyeh/unify.svg?branch=master" align="right" /></a>

Unify is a C++11 function to normalize resource identificators.

Unify transforms any physical resource string to a unified string, called UID (Unified ID). Any absolute, relative, virtual and/or networks paths, URI, URL or ID will transform to an UID. Basically unify(src) does a string transformation from given string to a sorted [a-zA-Z0-9-]+ pattern, which is guaranteed to remain inmutable (on a high degree) on code, even if physical source is altered externally.

Features

Public API

// Convert anything to an UID
// Additionally, if tags != null pushback all parsed tags found
string unify( const string &uri, vector<string> *tags = 0 );

Quick tutorial TL;DR

// unified folder/asset separators
std::string test = unify("folder\\asset");
assert( test == unify("folder/asset") );
assert( test == unify("folder-asset") );
assert( test == unify("folder|asset") );
assert( test == unify("folder:asset") );
assert( test == unify("folder;asset") );
assert( test == unify("folder,asset") );
assert( test == unify("[folder]asset") );
assert( test == unify("asset(folder)") );
// -> asset_folder

// unified absolute, relative, virtual and remote paths
test = unify("~home/game/folder/asset.jpg");
assert( test == unify("~user/game1/folder/asset.jpg") );
assert( test == unify("~mark/game2/folder/asset.jpg") );
assert( test == unify("~john/game3/data/folder/asset.jpg") );
assert( test == unify("../folder/asset.jpg") );
assert( test == unify("C:\\data\\folder\\asset.jpg") );
assert( test == unify("C:/game/data/folder/asset.jpg") );
assert( test == unify("data.zip/data/folder/asset.jpg") );
assert( test == unify("virtual.rar/folder/asset.jpg") );
assert( test == unify("http://web.domain.com%20/folder/asset.jpg?blabla=123&abc=123#qwe") );
// -> asset_folder

// unified uppercases, lowercases, whitespaces and hyphens
assert( unify("mesh/main-character") == "character_main_mesh" );
assert( unify("mesh/main_character") == "character_main_mesh" );
assert( unify("mesh/Main Character") == "character_main_mesh" );
assert( unify("mesh / Main  character ") == "character_main_mesh" );
// -> character_main_mesh

// unified extensions
assert( unify("music/theme.ogg") == "music_theme" );
assert( unify("music/theme.wav") == "music_theme" );
assert( unify("ui/logo.png") == "logo_ui" );
assert( unify("ui/logo.webp") == "logo_ui" );
// -> music_theme, -> logo_ui

// unified typos on double extensions and double punctuations
assert( unify("game/logo.bmp.png") == unify("game/logo.bmp") );
assert( unify("game/logo.png") == unify("game/logo..png") );
// -> game_logo

// unified typos on many diacritics
assert( unify("âñimátïón/wàlk") == unify("animation/walk") );
// -> animation_walk

// unified AoS (OO) and SoA (ECS) disk layouts
// unified plurals as well (if using English words)
assert( unify("sounds/kid")  == unify("kid/sound") );
assert( unify("sprites/kid") == unify("kid/sprite") );
assert( unify("sounds/car")  == unify("car/sound") );
assert( unify("sprites/car") == unify("car/sprite") );
// -> car_sound, car_sprite, kid_sound, kid_sprite

// unified SOV, SVO, VSO, VOS, OVS, OSV subject/verb/object language topologies
test = unify("player-joins-scene.intro");
assert( test == unify("player-scene-join.intro") );
assert( test == unify("join-player-scene.intro") );
assert( test == unify("join-scene-player.intro") );
assert( test == unify("scene-join-player.intro") );
assert( test == unify("scene-player-join.intro") );
// -> join_player_scene

// unified tagging (useful when globbing and deploying files and/or directories)
test = unify("splash/logo");
assert( unify("/splash/#win32/logo") == test );
assert( unify("splash #mobile/logo #win32=always.png") == test );
// -> logo_splash

// unified consistency. reunification as a lossless process
assert( unify( unify("roses-are-red") ) == unify("roses-are-red") );
// -> are_red_rose

Showcase

#include <map>
#include <string>
#include "unify.hpp"

// Simple filesystem dictionary
struct disk {
    std::map< std::string, std::string > map;
    void add( const std::string &uri ) {
        map[ unify(uri) ] = uri;
    };
    std::string lookup( const std::string &uid_or_uri ) const {
        auto find = map.find( unify(uid_or_uri) );
        return find == map.end() ? std::string() : find->second;
    };
};

int main() {
    disk d;
    d.add("./local/file.txt");
    d.add("./data/game/icon.png");
    d.add("./songs/main_theme.ogg");
    d.add("./game.zip/json #win32/inventory.json");
    d.add("./game.zip/logos #win32/big.webp");
    d.add("./game.zip/logos #mobile/small.png");
    /* pseudocode:
    for( all mounted filesystems ) {
        for( all monitored files in subdirs ) {
            d.add( file.full_path );
        }
    } */
    // these virtual<->physical uris are now equivalent
    assert( d.lookup("local/file") == "./local/file.txt" );
    assert( d.lookup("local-file") == "./local/file.txt" );
    assert( d.lookup("file-local") == "./local/file.txt" );
    assert( d.lookup("../file/local") == "./local/file.txt" );
    assert( d.lookup("game/icon") == "./data/game/icon.png" );
    assert( d.lookup("game-icon") == "./data/game/icon.png" );
    assert( d.lookup("icon/game") == "./data/game/icon.png" );
    assert( d.lookup("icon-game") == "./data/game/icon.png" );
    assert( d.lookup("songs/main-theme") == "./songs/main_theme.ogg" );
    assert( d.lookup("inventory-json") == "./game.zip/json #win32/inventory.json" );
    assert( d.lookup("logos-big") == "./game.zip/logos #win32/big.webp" );
    assert( d.lookup("logos-big") == "./game.zip/logos #win32/big.webp" );
}
tools/7za a -tzip common.zip *#all*
tools/7za a -tzip win32.zip *#w32*
tools/7za a -tzip iphones.zip *#iphone*
tools/7za a -tzip xmas.zip *#xmas*
tools/7za a -tzip halloween.zip *#halloween*
wget -N http://website.com/dlc/common.zip
wget -N http://website.com/dlc/xmas.zip
wget -N http://website.com/dlc/halloween.zip
[ "$PROFILE" == "WIN32"  ] && wget -N http://website.com/dlc/win32.zip
[ "$PROFILE" == "IPHONE" ] && wget -N http://website.com/dlc/iphones.zip
dir *#3* && dir *#4*
dir *#iphone5*#textures*

Appendix: On transformation

Transformation on reference implementation performs as follows:

  1. Latinization (utf8)
  2. Remove diacritics (utf8)
  3. Unescape URL (utf8)
  4. Remove url options (if any)
  5. Lowercase contents
  6. Strip tags in #tag-123, #tag_456 or #xbox360=yes format.
  7. Split path up to 2nd level.
  8. Trim extensions and punctuators (if any).
  9. Replace whitespaces with - hyphens.
  10. Split string into tokens (with - hyphen separator).
  11. Sort tokens array.
  12. For every token, fix aos/soa plural (if any).
  13. Join stems with - hyphen separator.

Appendix: Full tagging proposal

name (#platforms)(#factories)(#contexts)(#alias)(#type)(#version)

A possible proposal for a family of optional tags for any UID, that would be:

Note: there are no reserved keywords in Unify. Name tags are application/project/company dependant and have to be defined in anticipation.

Changelog