#include #include #include #include #include "cli_parse.h" #include "spinner.h" #include "vidinput.h" #include "libvmaf/picture.h" #include "libvmaf/libvmaf.h" static enum VmafPixelFormat pix_fmt_map(int pf) { switch (pf) { case PF_420: return VMAF_PIX_FMT_YUV420P; case PF_422: return VMAF_PIX_FMT_YUV422P; case PF_444: return VMAF_PIX_FMT_YUV444P; default: return VMAF_PIX_FMT_UNKNOWN; } } static int validate_videos(video_input *vid1, video_input *vid2) { int err_cnt = 0; video_input_info info1, info2; video_input_get_info(vid1, &info1); video_input_get_info(vid2, &info2); if ((info1.frame_w != info2.frame_w) || (info1.frame_h != info2.frame_h)) { fprintf(stderr, "dimensions do not match: %dx%d, %dx%d\n", info1.frame_w, info1.frame_h, info2.frame_w, info2.frame_h); err_cnt++; } if (info1.pixel_fmt != info2.pixel_fmt) { fprintf(stderr, "pixel formats do not match: %d, %d\n", info1.pixel_fmt, info2.pixel_fmt); err_cnt++; } if (!pix_fmt_map(info1.pixel_fmt) || !pix_fmt_map(info2.pixel_fmt)) { fprintf(stderr, "unsupported pixel format: %d\n", info1.pixel_fmt); err_cnt++; } if (info1.depth != info2.depth) { fprintf(stderr, "bitdepths do not match: %d, %d\n", info1.depth, info2.depth); err_cnt++; } if (info1.depth < 8 || info1.depth > 16) { fprintf(stderr, "unsupported bitdepth: %d\n", info1.depth); err_cnt++; } //TODO: more validations are possible. return err_cnt; } static int fetch_picture(video_input *vid, VmafPicture *pic) { int ret; video_input_ycbcr ycbcr; video_input_info info; ret = video_input_fetch_frame(vid, ycbcr, NULL); if (ret < 1) return !ret; video_input_get_info(vid, &info); ret = vmaf_picture_alloc(pic, pix_fmt_map(info.pixel_fmt), info.depth, info.pic_w, info.pic_h); if (ret) { fprintf(stderr, "problem allocating picture.\n"); return -1; } if (info.depth == 8) { for (unsigned i = 0; i < 3; i++) { int xdec = i&&!(info.pixel_fmt&1); int ydec = i&&!(info.pixel_fmt&2); int xstride = info.depth > 8 ? 2 : 1; uint8_t *ycbcr_data = ycbcr[i].data + (info.pic_y >> ydec) * ycbcr[i].stride + (info.pic_x * xstride >> xdec); // ^ gross, but this is how the daala y4m API works. FIXME. uint8_t *pic_data = pic->data[i]; for (unsigned j = 0; j < pic->h[i]; j++) { memcpy(pic_data, ycbcr_data, sizeof(*pic_data) * pic->w[i]); pic_data += pic->stride[i]; ycbcr_data += ycbcr[i].stride; } } } else { for (unsigned i = 0; i < 3; i++) { int xdec = i&&!(info.pixel_fmt&1); int ydec = i&&!(info.pixel_fmt&2); int xstride = info.depth > 8 ? 2 : 1; uint16_t *ycbcr_data = (uint16_t*) ycbcr[i].data + (info.pic_y >> ydec) * (ycbcr[i].stride / 2) + (info.pic_x * xstride >> xdec); // ^ gross, but this is how the daala y4m API works. FIXME. uint16_t *pic_data = pic->data[i]; for (unsigned j = 0; j < pic->h[i]; j++) { memcpy(pic_data, ycbcr_data, sizeof(*pic_data) * pic->w[i]); pic_data += pic->stride[i] / 2; ycbcr_data += ycbcr[i].stride / 2; } } } return 0; } int main(int argc, char *argv[]) { int err = 0; const int istty = isatty(fileno(stderr)); CLISettings c; cli_parse(argc, argv, &c); if (istty && !c.quiet) { fprintf(stderr, "VMAF version %s\n", vmaf_version()); } FILE *file_ref = fopen(c.path_ref, "rb"); if (!file_ref) { fprintf(stderr, "could not open file: %s\n", c.path_ref); return -1; } FILE *file_dist = fopen(c.path_dist, "rb"); if (!file_dist) { fprintf(stderr, "could not open file: %s\n", c.path_dist); return -1; } video_input vid_ref; if (c.use_yuv) { err = raw_input_open(&vid_ref, file_ref, c.width, c.height, c.pix_fmt, c.bitdepth); } else { err = video_input_open(&vid_ref, file_ref); } if (err) { fprintf(stderr, "problem with reference file: %s\n", c.path_ref); return -1; } video_input vid_dist; if (c.use_yuv) { err = raw_input_open(&vid_dist, file_dist, c.width, c.height, c.pix_fmt, c.bitdepth); } else { err = video_input_open(&vid_dist, file_dist); } if (err) { fprintf(stderr, "problem with distorted file: %s\n", c.path_dist); return -1; } err = validate_videos(&vid_ref, &vid_dist); if (err) { fprintf(stderr, "videos are incompatible, %d %s.\n", err, err == 1 ? "problem" : "problems"); return -1; } VmafConfiguration cfg = { .log_level = VMAF_LOG_LEVEL_INFO, .n_threads = c.thread_cnt, .n_subsample = c.subsample, .cpumask = c.cpumask, }; VmafContext *vmaf; err = vmaf_init(&vmaf, cfg); if (err) { fprintf(stderr, "problem initializing VMAF context\n"); return -1; } VmafModel **model; const size_t model_sz = sizeof(*model) * c.model_cnt; model = malloc(model_sz); memset(model, 0, model_sz); VmafModelCollection **model_collection; const size_t model_collection_sz = sizeof(*model_collection) * c.model_cnt; model_collection = malloc(model_sz); memset(model_collection, 0, model_collection_sz); const char *model_collection_label[c.model_cnt]; unsigned model_collection_cnt = 0; for (unsigned i = 0; i < c.model_cnt; i++) { if (c.model_config[i].version) { err = vmaf_model_load(&model[i], &c.model_config[i].cfg, c.model_config[i].version); } else { err = vmaf_model_load_from_path(&model[i], &c.model_config[i].cfg, c.model_config[i].path); } if (err) { // check for model_collection before failing // this is implicit because the `--model` option could take either // a model or model_collection if (c.model_config[i].version) { err = vmaf_model_collection_load(&model[i], &model_collection[model_collection_cnt], &c.model_config[i].cfg, c.model_config[i].version); } else { err = vmaf_model_collection_load_from_path(&model[i], &model_collection[model_collection_cnt], &c.model_config[i].cfg, c.model_config[i].path); } if (err) { fprintf(stderr, "problem loading model: %s\n", c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path); return -1; } model_collection_label[model_collection_cnt] = c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path; for (unsigned j = 0; j < c.model_config[i].overload_cnt; j++) { err = vmaf_model_collection_feature_overload( model[i], &model_collection[model_collection_cnt], c.model_config[i].feature_overload[j].name, c.model_config[i].feature_overload[j].opts_dict); if (err) { fprintf(stderr, "problem overloading feature extractors from " "model collection: %s\n", c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path); return -1; } } err = vmaf_use_features_from_model_collection(vmaf, model_collection[model_collection_cnt]); if (err) { fprintf(stderr, "problem loading feature extractors from " "model collection: %s\n", c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path); return -1; } model_collection_cnt++; continue; } for (unsigned j = 0; j < c.model_config[i].overload_cnt; j++) { err = vmaf_model_feature_overload(model[i], c.model_config[i].feature_overload[j].name, c.model_config[i].feature_overload[j].opts_dict); if (err) { fprintf(stderr, "problem overloading feature extractors from " "model: %s\n", c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path); return -1; } } err = vmaf_use_features_from_model(vmaf, model[i]); if (err) { fprintf(stderr, "problem loading feature extractors from model: %s\n", c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path); return -1; } } for (unsigned i = 0; i < c.feature_cnt; i++) { err = vmaf_use_feature(vmaf, c.feature_cfg[i].name, c.feature_cfg[i].opts_dict); if (err) { fprintf(stderr, "problem loading feature extractor: %s\n", c.feature_cfg[i].name); return -1; } } float fps = 0.; const time_t t0 = clock(); unsigned picture_index; for (picture_index = 0 ;; picture_index++) { if (c.frame_cnt && picture_index >= c.frame_cnt) break; VmafPicture pic_ref, pic_dist; int ret1 = fetch_picture(&vid_ref, &pic_ref); int ret2 = fetch_picture(&vid_dist, &pic_dist); if (ret1 && ret2) { break; } else if (ret1 < 0 || ret2 < 0) { fprintf(stderr, "\nproblem while reading pictures\n"); break; } else if (ret1) { fprintf(stderr, "\n\"%s\" ended before \"%s\".\n", c.path_ref, c.path_dist); break; } else if (ret2) { fprintf(stderr, "\n\"%s\" ended before \"%s\".\n", c.path_dist, c.path_ref); break; } if (istty && !c.quiet) { if (picture_index > 0 && !(picture_index % 10)) { fps = (picture_index + 1) / (((float)clock() - t0) / CLOCKS_PER_SEC); } fprintf(stderr, "\r%d frame%s %s %.2f FPS\033[K", picture_index + 1, picture_index ? "s" : " ", spinner[picture_index % spinner_length], fps); fflush(stderr); } err = vmaf_read_pictures(vmaf, &pic_ref, &pic_dist, picture_index); if (err) { fprintf(stderr, "\nproblem reading pictures\n"); break; } } if (istty && !c.quiet) fprintf(stderr, "\n"); err |= vmaf_read_pictures(vmaf, NULL, NULL, 0); if (err) { fprintf(stderr, "problem flushing context\n"); return err; } if (!c.no_prediction) { for (unsigned i = 0; i < c.model_cnt; i++) { double vmaf_score; err = vmaf_score_pooled(vmaf, model[i], VMAF_POOL_METHOD_MEAN, &vmaf_score, 0, picture_index - 1); if (err) { fprintf(stderr, "problem generating pooled VMAF score\n"); return -1; } if (istty && (!c.quiet || !c.output_path)) { fprintf(stderr, "%s: %f\n", c.model_config[i].version ? c.model_config[i].version : c.model_config[i].path, vmaf_score); } } for (unsigned i = 0; i < model_collection_cnt; i++) { VmafModelCollectionScore score = { 0 }; err = vmaf_score_pooled_model_collection(vmaf, model_collection[i], VMAF_POOL_METHOD_MEAN, &score, 0, picture_index - 1); if (err) { fprintf(stderr, "problem generating pooled VMAF score\n"); return -1; } switch (score.type) { case VMAF_MODEL_COLLECTION_SCORE_BOOTSTRAP: if (istty && (!c.quiet || !c.output_path)) { fprintf(stderr, "%s: %f, ci.p95: [%f, %f], stddev: %f\n", model_collection_label[i], score.bootstrap.bagging_score, score.bootstrap.ci.p95.lo, score.bootstrap.ci.p95.hi, score.bootstrap.stddev); } break; default: break; } } } if (c.output_path) vmaf_write_output(vmaf, c.output_path, c.output_fmt); for (unsigned i = 0; i < c.model_cnt; i++) vmaf_model_destroy(model[i]); free(model); for (unsigned i = 0; i < model_collection_cnt; i++) vmaf_model_collection_destroy(model_collection[i]); free(model_collection); video_input_close(&vid_ref); video_input_close(&vid_dist); vmaf_close(vmaf); cli_free(&c); return err; }