MetadataParser.cpp 20.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*****************************************************************************
 * Media Library
 *****************************************************************************
 * Copyright (C) 2015 Hugo Beauzée-Luyssen, Videolabs
 *
 * Authors: Hugo Beauzée-Luyssen<hugo@beauzee.fr>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
 *****************************************************************************/

23 24 25 26
#if HAVE_CONFIG_H
# include "config.h"
#endif

27 28 29 30 31
#include "MetadataParser.h"
#include "Album.h"
#include "AlbumTrack.h"
#include "Artist.h"
#include "File.h"
32 33 34
#include "filesystem/IDevice.h"
#include "filesystem/IDirectory.h"
#include "Folder.h"
35
#include "Genre.h"
36
#include "Media.h"
37
#include "Playlist.h"
38 39
#include "Show.h"
#include "utils/Filename.h"
40
#include "utils/ModificationsNotifier.h"
41
#include <cstdlib>
42

43 44 45
namespace medialibrary
{

46 47 48 49 50
MetadataParser::MetadataParser()
    : m_previousFolderId( 0 )
{
}

51 52
bool MetadataParser::initialize()
{
53
    m_unknownArtist = Artist::fetch( m_ml, UnknownArtistID );
54 55 56 57 58
    if ( m_unknownArtist == nullptr )
        LOG_ERROR( "Failed to cache unknown artist" );
    return m_unknownArtist != nullptr;
}

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
int MetadataParser::toInt( VLC::Media& vlcMedia, libvlc_meta_t meta, const char* name )
{
    auto str = vlcMedia.meta( meta );
    if ( str.empty() == false )
    {
        try
        {
            return std::stoi( str );
        }
        catch( std::logic_error& ex)
        {
            LOG_WARN( "Invalid ", name, " provided (", str, "): ", ex.what() );
        }
    }
    return 0;
}

76 77
parser::Task::Status MetadataParser::run( parser::Task& task )
{
78
    if ( task.file == nullptr )
79
    {
80
        assert( task.media == nullptr );
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
        auto t = m_ml->getConn()->newTransaction();
        LOG_INFO( "Adding ", task.mrl );
        task.media = Media::create( m_ml, IMedia::Type::Unknown, utils::file::fileName( task.mrl ) );
        if ( task.media == nullptr )
        {
            LOG_ERROR( "Failed to add media ", task.mrl, " to the media library" );
            return parser::Task::Status::Fatal;
        }
        // For now, assume all media are made of a single file
        task.file = task.media->addFile( *task.fileFs, task.parentFolder->id(),
                                         task.parentFolderFs->device()->isRemovable(),
                                         File::Type::Main );
        if ( task.file == nullptr )
        {
            LOG_ERROR( "Failed to add file ", task.mrl, " to media #", task.media->id() );
            return parser::Task::Status::Fatal;
        }
        t->commit();
        // Synchronize file step tracker with task
        task.markStepCompleted( static_cast<parser::Task::ParserStep>( task.step ) );
    }

103
    const auto& tracks = task.vlcMedia.tracks();
104 105 106 107 108

    // If we failed to extract any tracks, don't make any assumption and forward to the
    // thumbnailer. Since it starts an actual playback, it will have more information.
    // Since the metadata steps won't be marked, it will run again once the thumbnailer has completed.
    if ( tracks.empty() == true )
109 110
    {
        // However, if the file is not unknown anymore, it means the thumbnailer has already processed it
111
        if ( task.media->type() == Media::Type::Unknown )
112
        {
113 114 115 116
            // In case the thumbnailer ran before, but the application exited, we would skip the
            // thumbnailer execution, coming back here, and delegating again to the thumbnailer
            // over and over again. We need to ensure the thumbnailer will run, even partially, up to
            // the point the playback started.
117
            task.markStepUncompleted( parser::Task::ParserStep::Thumbnailer );
118
            LOG_INFO( "Skipping metadata parsing for file with unknown type: ", task.file->mrl() );
119
            return parser::Task::Status::Success;
120
        }
121 122 123
        // In that case, stop trying to do something with this file.
        return parser::Task::Status::Fatal;
    }
124

125
    bool isAudio = true;
126
    {
127 128 129
        using TracksT = decltype( tracks );
        sqlite::Tools::withRetries( 3, [this, &isAudio, &task]( TracksT tracks ) {
            auto t = m_ml->getConn()->newTransaction();
130
            for ( const auto& track : tracks )
131
            {
132
                auto codec = track.codec();
133
                std::string fcc( reinterpret_cast<const char*>( &codec ), 4 );
134
                if ( track.type() == VLC::MediaTrack::Type::Video )
135
                {
136 137 138
                    task.media->addVideoTrack( fcc, track.width(), track.height(),
                                          static_cast<float>( track.fpsNum() ) / static_cast<float>( track.fpsDen() ),
                                          track.language(), track.description() );
139 140
                    isAudio = false;
                }
141
                else if ( track.type() == VLC::MediaTrack::Type::Audio )
142
                {
143 144
                    task.media->addAudioTrack( fcc, track.bitrate(), track.rate(), track.channels(),
                                          track.language(), track.description() );
145
                }
146
            }
147 148 149
            task.media->setDuration( task.vlcMedia.duration() );
            t->commit();
        }, std::move( tracks ) );
150 151 152
    }
    if ( isAudio == true )
    {
153
        if ( parseAudioFile( task ) == false )
154 155 156 157
            return parser::Task::Status::Fatal;
    }
    else
    {
158
        if (parseVideoFile( task ) == false )
159 160
            return parser::Task::Status::Fatal;
    }
161

162 163 164
    if ( task.file->isDeleted() == true || task.media->isDeleted() == true )
        return parser::Task::Status::Fatal;

165
    task.markStepCompleted( parser::Task::ParserStep::MetadataAnalysis );
166 167
    // Save ourselves from the useless processing of a thumbnail later if
    // we're analyzing an audio file
168
    if ( isAudio == true && utils::file::schemeIs( "attachment://", task.media->thumbnail() ) == false )
169
        task.markStepCompleted( parser::Task::ParserStep::Thumbnailer );
170
    if ( task.file->saveParserStep() == false )
171
        return parser::Task::Status::Fatal;
172
    m_notifier->notifyMediaCreation( task.media );
173 174 175 176 177
    return parser::Task::Status::Success;
}

/* Video files */

178
bool MetadataParser::parseVideoFile( parser::Task& task ) const
179
{
180
    auto media = task.media.get();
181
    media->setType( IMedia::Type::Video );
182 183
    const auto& title = task.vlcMedia.meta( libvlc_meta_Title );
    if ( title.length() == 0 )
184
        return true;
185

186 187
    const auto& showName = task.vlcMedia.meta( libvlc_meta_ShowName );
    if ( showName.length() == 0 )
188
    {
189 190
        return sqlite::Tools::withRetries( 3, [this, &showName, &title, &task]() {
            auto t = m_ml->getConn()->newTransaction();
191

192
            auto show = m_ml->show( showName );
193
            if ( show == nullptr )
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
            {
                show = m_ml->createShow( showName );
                if ( show == nullptr )
                    return false;
            }
            auto episode = toInt( task.vlcMedia, libvlc_meta_Episode, "episode number" );
            if ( episode != 0 )
            {
                std::shared_ptr<Show> s = std::static_pointer_cast<Show>( show );
                s->addEpisode( *task.media, title, episode );
            }
            task.media->save();
            t->commit();
            return true;
        });
209 210 211 212 213 214 215 216 217 218
    }
    else
    {
        // How do we know if it's a movie or a random video?
    }
    return true;
}

/* Audio files */

219
bool MetadataParser::parseAudioFile( parser::Task& task )
220
{
221
    task.media->setType( IMedia::Type::Audio );
222

223
    auto artworkMrl = task.vlcMedia.meta( libvlc_meta_ArtworkURL );
224
    if ( artworkMrl.empty() == false )
225
    {
226
        task.media->setThumbnail( artworkMrl );
227 228 229 230 231
        // Don't use an attachment as default artwork for album/artists
        if ( utils::file::schemeIs( "attachment", artworkMrl ) )
            artworkMrl.clear();
    }

232

233
    auto genre = handleGenre( task );
234
    auto artists = findOrCreateArtist( task );
235 236
    if ( artists.first == nullptr && artists.second == nullptr )
        return false;
237
    auto album = findAlbum( task, artists.first, artists.second );
238 239 240
    return sqlite::Tools::withRetries( 3, [this, &task, &artists]( std::string artworkMrl,
                                                  std::shared_ptr<Album> album, std::shared_ptr<Genre> genre ) {
        auto t = m_ml->getConn()->newTransaction();
241
        if ( album == nullptr )
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
        {
            const auto& albumName = task.vlcMedia.meta( libvlc_meta_Album );
            album = m_ml->createAlbum( albumName, artworkMrl );
            if ( album == nullptr )
                return false;
            m_notifier->notifyAlbumCreation( album );
        }
        // If we know a track artist, specify it, otherwise, fallback to the album/unknown artist
        auto track = handleTrack( album, task, artists.second ? artists.second : artists.first,
                                  genre.get() );

        auto res = link( *task.media, album, artists.first, artists.second );
        task.media->save();
        t->commit();
        return res;
    }, std::move( artworkMrl ), std::move( album ), std::move( genre ) );
258 259
}

260 261
std::shared_ptr<Genre> MetadataParser::handleGenre( parser::Task& task ) const
{
262 263
    const auto& genreStr = task.vlcMedia.meta( libvlc_meta_Genre );
    if ( genreStr.length() == 0 )
264
        return nullptr;
265
    auto genre = Genre::fromName( m_ml, genreStr );
266 267
    if ( genre == nullptr )
    {
268
        genre = Genre::create( m_ml, genreStr );
269
        if ( genre == nullptr )
270
            LOG_ERROR( "Failed to get/create Genre", genreStr );
271 272 273 274
    }
    return genre;
}

275
/* Album handling */
276 277

std::shared_ptr<Album> MetadataParser::findAlbum( parser::Task& task, std::shared_ptr<Artist> albumArtist,
278
                                                    std::shared_ptr<Artist> trackArtist )
279
{
280 281
    const auto& albumName = task.vlcMedia.meta( libvlc_meta_Album );
    if ( albumName.empty() == true )
282
    {
283 284 285 286 287 288
        std::shared_ptr<Artist> artist = albumArtist;
        if ( albumArtist != nullptr )
            return albumArtist->unknownAlbum();
        else if ( trackArtist != nullptr )
            return trackArtist->unknownAlbum();
        return m_unknownArtist->unknownAlbum();
289 290
    }

291 292 293 294 295 296
    if ( m_previousAlbum != nullptr && albumName == m_previousAlbum->title() &&
         m_previousFolderId != 0 && task.file->folderId() == m_previousFolderId )
        return m_previousAlbum;
    m_previousAlbum.reset();
    m_previousFolderId = 0;

297 298
    // Album matching depends on the difference between artist & album artist.
    // Specificaly pass the albumArtist here.
299 300
    static const std::string req = "SELECT * FROM " + policy::AlbumTable::Name +
            " WHERE title = ?";
301
    auto albums = Album::fetchAll<Album>( m_ml, req, albumName );
302 303 304 305

    if ( albums.size() == 0 )
        return nullptr;

306
    const auto discTotal = toInt( task.vlcMedia, libvlc_meta_DiscTotal, "disc total" );
307
    const auto discNumber = toInt( task.vlcMedia, libvlc_meta_DiscNumber, "disc number" );
308 309 310 311 312
    /*
     * Even if we get only 1 album, we need to filter out invalid matches.
     * For instance, if we have already inserted an album "A" by an artist "john"
     * but we are now trying to handle an album "A" by an artist "doe", not filtering
     * candidates would yield the only "A" album we know, while we should return
313
     * nullptr, so the link() method can create a new one.
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
     */
    for ( auto it = begin( albums ); it != end( albums ); )
    {
        auto a = (*it).get();
        if ( albumArtist != nullptr )
        {
            // We assume that an album without album artist is a positive match.
            // At the end of the day, without proper tags, there's only so much we can do.
            auto candidateAlbumArtist = a->albumArtist();
            if ( candidateAlbumArtist != nullptr && candidateAlbumArtist->id() != albumArtist->id() )
            {
                it = albums.erase( it );
                continue;
            }
        }
        // If this is a multidisc album, assume it could be in a multiple amount of folders.
        // Since folders can come in any order, we can't assume the first album will be the
        // first media we see. If the discTotal or discNumber meta are provided, that's easy. If not,
        // we assume that another CD with the same name & artists, and a disc number > 1
        // denotes a multi disc album
        // Check the first case early to avoid fetching tracks if unrequired.
335
        if ( discTotal > 1 || discNumber > 1 )
336 337 338 339
        {
            ++it;
            continue;
        }
340
        const auto tracks = a->cachedTracks();
341 342 343 344 345 346 347
        // If there is no tracks to compare with, we just have to hope this will be the only valid
        // album match
        if ( tracks.size() == 0 )
        {
            ++it;
            continue;
        }
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366

        auto multiDisc = false;
        for ( auto& t : tracks )
        {
            auto at = t->albumTrack();
            assert( at != nullptr );
            if ( at != nullptr && at->discNumber() > 1 )
            {
                multiDisc = true;
                break;
            }
        }
        if ( multiDisc )
        {
            ++it;
            continue;
        }

        // Assume album files will be in the same folder.
367
        auto newFileFolder = utils::file::directory( task.file->mrl() );
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
        auto trackFiles = tracks[0]->files();
        bool excluded = false;
        for ( auto& f : trackFiles )
        {
            auto candidateFolder = utils::file::directory( f->mrl() );
            if ( candidateFolder != newFileFolder )
            {
                excluded = true;
                break;
            }
        }
        if ( excluded == true )
        {
            it = albums.erase( it );
            continue;
        }
        ++it;
    }
    if ( albums.size() == 0 )
        return nullptr;
    if ( albums.size() > 1 )
    {
390
        LOG_WARN( "Multiple candidates for album ", albumName, ". Selecting first one out of luck" );
391
    }
392 393 394
    m_previousFolderId = task.file->folderId();
    m_previousAlbum = albums[0];
    return albums[0];
395 396 397 398
}

///
/// \brief MetadataParser::handleArtists Returns Artist's involved on a track
399
/// \param task The current parser task
400 401 402 403
/// \return A pair containing:
/// The album artist as a first element
/// The track artist as a second element, or nullptr if it is the same as album artist
///
404
std::pair<std::shared_ptr<Artist>, std::shared_ptr<Artist>> MetadataParser::findOrCreateArtist( parser::Task& task ) const
405 406 407
{
    std::shared_ptr<Artist> albumArtist;
    std::shared_ptr<Artist> artist;
408
    static const std::string req = "SELECT * FROM " + policy::ArtistTable::Name + " WHERE name = ?";
409

410 411 412
    const auto& albumArtistStr = task.vlcMedia.meta( libvlc_meta_AlbumArtist );
    const auto& artistStr = task.vlcMedia.meta( libvlc_meta_Artist );
    if ( albumArtistStr.empty() == true && artistStr.empty() == true )
413
    {
414
        return {m_unknownArtist, m_unknownArtist};
415 416
    }

417
    if ( albumArtistStr.empty() == false )
418
    {
419
        albumArtist = Artist::fetch( m_ml, req, albumArtistStr );
420 421
        if ( albumArtist == nullptr )
        {
422
            albumArtist = m_ml->createArtist( albumArtistStr );
423 424
            if ( albumArtist == nullptr )
            {
425
                LOG_ERROR( "Failed to create new artist ", albumArtistStr );
426 427
                return {nullptr, nullptr};
            }
428
            m_notifier->notifyArtistCreation( albumArtist );
429 430
        }
    }
431
    if ( artistStr.empty() == false && artistStr != albumArtistStr )
432
    {
433
        artist = Artist::fetch( m_ml, req, artistStr );
434 435
        if ( artist == nullptr )
        {
436
            artist = m_ml->createArtist( artistStr );
437 438
            if ( artist == nullptr )
            {
439
                LOG_ERROR( "Failed to create new artist ", artistStr );
440 441
                return {nullptr, nullptr};
            }
442
            m_notifier->notifyArtistCreation( albumArtist );
443 444 445 446 447 448 449
        }
    }
    return {albumArtist, artist};
}

/* Tracks handling */

450 451
std::shared_ptr<AlbumTrack> MetadataParser::handleTrack( std::shared_ptr<Album> album, parser::Task& task,
                                                         std::shared_ptr<Artist> artist, Genre* genre ) const
452
{
453 454 455
    auto title = task.vlcMedia.meta( libvlc_meta_Title );
    const auto trackNumber = toInt( task.vlcMedia, libvlc_meta_TrackNumber, "track number" );
    const auto discNumber = toInt( task.vlcMedia, libvlc_meta_DiscNumber, "disc number" );
456 457 458
    if ( title.empty() == true )
    {
        LOG_WARN( "Failed to get track title" );
459
        if ( trackNumber != 0 )
460 461
        {
            title = "Track #";
462
            title += std::to_string( trackNumber );
463 464 465
        }
    }
    if ( title.empty() == false )
Hugo Beauzée-Luyssen's avatar
Hugo Beauzée-Luyssen committed
466
        task.media->setTitleBuffered( title );
467

468 469
    auto track = std::static_pointer_cast<AlbumTrack>( album->addTrack( task.media, trackNumber,
                                                                        discNumber, artist->id(),
470
                                                                        genre ) );
471 472 473 474 475
    if ( track == nullptr )
    {
        LOG_ERROR( "Failed to create album track" );
        return nullptr;
    }
476

477 478
    const auto& releaseDate = task.vlcMedia.meta( libvlc_meta_Date );
    if ( releaseDate.empty() == false )
479
    {
480
        auto releaseYear = atoi( releaseDate.c_str() );
481
        task.media->setReleaseDate( releaseYear );
482 483 484 485 486
        // Let the album handle multiple dates. In order to do this properly, we need
        // to know if the date has been changed before, which can be known only by
        // using Album class internals.
        album->setReleaseYear( releaseYear, false );
    }
487
    m_notifier->notifyAlbumTrackCreation( track );
488 489 490 491 492 493
    return track;
}

/* Misc */

bool MetadataParser::link( Media& media, std::shared_ptr<Album> album,
494
                               std::shared_ptr<Artist> albumArtist, std::shared_ptr<Artist> artist )
495
{
496
    if ( albumArtist == nullptr )
497 498
    {
        assert( artist != nullptr );
499
        albumArtist = artist;
500
    }
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522

    // We might modify albumArtist later, hence handle thumbnails before.
    // If we have an albumArtist (meaning the track was properly tagged, we
    // can assume this artist is a correct match. We can use the thumbnail from
    // the current album for the albumArtist, if none has been set before.
    if ( albumArtist != nullptr && albumArtist->artworkMrl().empty() == true &&
         album != nullptr && album->artworkMrl().empty() == false )
        albumArtist->setArtworkMrl( album->artworkMrl() );

    if ( albumArtist != nullptr )
        albumArtist->addMedia( media );
    if ( artist != nullptr && ( albumArtist == nullptr || albumArtist->id() != artist->id() ) )
        artist->addMedia( media );

    auto currentAlbumArtist = album->albumArtist();

    // If we have no main artist yet, that's easy, we need to assign one.
    if ( currentAlbumArtist == nullptr )
    {
        // We don't know if the artist was tagged as artist or albumartist, however, we simply add it
        // as the albumartist until proven we were wrong (ie. until one of the next tracks
        // has a different artist)
523
        album->setAlbumArtist( albumArtist );
524 525 526 527 528 529 530 531 532 533
        // Always add the album artist as an artist
        album->addArtist( albumArtist );
        if ( artist != nullptr )
            album->addArtist( artist );
    }
    else
    {
        if ( albumArtist->id() != currentAlbumArtist->id() )
        {
            // We have more than a single artist on this album, fallback to various artists
534 535 536
            if ( m_variousArtists == nullptr )
                m_variousArtists = Artist::fetch( m_ml, VariousArtistID );
            album->setAlbumArtist( m_variousArtists );
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
            // Add those two artists as "featuring".
            album->addArtist( albumArtist );
        }
        if ( artist != nullptr && artist->id() != albumArtist->id() )
        {
            if ( albumArtist->id() != artist->id() )
               album->addArtist( artist );
        }
    }

    return true;
}

const char* MetadataParser::name() const
{
    return "Metadata";
}
554 555 556 557 558 559 560 561 562 563

uint8_t MetadataParser::nbThreads() const
{
//    auto nbProcs = std::thread::hardware_concurrency();
//    if ( nbProcs == 0 )
//        return 1;
//    return nbProcs;
    // Let's make this code thread-safe first :)
    return 1;
}
564

565
bool MetadataParser::isCompleted( const parser::Task& task ) const
566
{
567
    // We always need to run this task if the metadata extraction isn't completed
568 569
    return ( static_cast<uint8_t>( task.step ) &
            static_cast<uint8_t>( parser::Task::ParserStep::MetadataAnalysis ) ) != 0;
570 571
}

572
}